From c993e8e815b34129b75e761ad249a526830973b1 Mon Sep 17 00:00:00 2001 From: Anuj Mittal Date: Tue, 5 Apr 2022 10:34:33 +0800 Subject: Remove support for building with LLVM 10 We no longer support building with older branches of OE-Core/meta-clang so remove LLVM 10 specific configurations and patches. Signed-off-by: Anuj Mittal --- conf/machine/include/meta-intel.inc | 6 +- ...tils-Add-metadata-fixing-in-SplitBlockPre.patch | 111 - ...ify-Do-not-use-SCEV-expander-for-IVCount-.patch | 146 - .../files/llvm10-0001-OpenCL-3.0-support.patch | 8259 -------------------- ...lvm10-0001-llvm-spirv-skip-building-tests.patch | 51 - ...2-Add-cl_khr_extended_subgroup-extensions.patch | 812 -- ...g-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch | 33 - ...t-for-cl_ext_float_atomics-in-SPIRVWriter.patch | 982 --- ...-Memory-leak-fix-for-Managed-Static-Mutex.patch | 35 - .../llvm10-0004-Remove-repo-name-in-LLVM-IR.patch | 49 - ...AGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch | 47 - ...ng-ParseCommandLineOptions-in-BackendUtil.patch | 53 - .../llvm10-0007-support-cl_ext_float_atomics.patch | 377 - .../llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch | 96 - .../files/llvm10-0009-ispc-10_0_fix_for_1788.patch | 105 - .../files/llvm10-0010-ispc-10_0_fix_for_1793.patch | 43 - .../files/llvm10-0011-ispc-10_0_fix_for_1844.patch | 34 - ...0-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch | 40 - ...10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch | 61 - ...014-ispc-10_0_packed_load_store_avx512skx.patch | 97 - ...-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch | 173 - ...e-call-site-info-only-for-g-optimizations.patch | 550 -- ...argetOptionsCommandFlags.inc-and-CommandF.patch | 2243 ------ .../llvm10-basic-block-sections-support.patch | 237 - .../clang/llvm-project-source.bbappend | 34 - ...-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch | 35 - ...nly-static-libraries-linked-for-native-bu.patch | 42 - .../opencl-clang/opencl-clang_10.0.0.bb | 15 - 28 files changed, 2 insertions(+), 14764 deletions(-) delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-OpenCL-3.0-support.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-llvm-spirv-skip-building-tests.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0004-Remove-repo-name-in-LLVM-IR.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0007-support-cl_ext_float_atomics.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch delete mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-basic-block-sections-support.patch delete mode 100644 dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch delete mode 100644 dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-make-sure-only-static-libraries-linked-for-native-bu.patch delete mode 100644 dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_10.0.0.bb diff --git a/conf/machine/include/meta-intel.inc b/conf/machine/include/meta-intel.inc index 452a423d..511179e3 100644 --- a/conf/machine/include/meta-intel.inc +++ b/conf/machine/include/meta-intel.inc @@ -21,10 +21,8 @@ PREFERRED_PROVIDER_libva-utils = "libva-intel-utils" PREFERRED_PROVIDER_libva-utils-native = "libva-intel-utils-native" PREFERRED_PROVIDER_nativesdk-libva-utils = "nativesdk-libva-intel-utils" -PREFERRED_VERSION_opencl-clang ?= "${@bb.utils.contains('LLVMVERSION', '10.0.1', '10.0.0', \ - bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d), d)}" -PREFERRED_VERSION_opencl-clang-native ?= "${@bb.utils.contains('LLVMVERSION', '10.0.1', '10.0.0', \ - bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d), d)}" +PREFERRED_VERSION_opencl-clang ?= "${@bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d)}" +PREFERRED_VERSION_opencl-clang-native ?= "${@bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d)}" XSERVER_X86_ASPEED_AST = "xf86-video-ast \ " diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch deleted file mode 100644 index cd519971..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch +++ /dev/null @@ -1,111 +0,0 @@ -From eeb816d95f0910bd246e37bb2bb3923acf0edf6b Mon Sep 17 00:00:00 2001 -From: Aleksander Us -Date: Mon, 26 Aug 2019 15:47:41 +0300 -Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in - SplitBlockPredecessors. - -In case when BB is header of some loop and predecessor is latch of -this loop, metadata was not attached to newly created basic block. -This led to loss of loop metadata for other passes. - -Upstream-Status: Submitted [https://reviews.llvm.org/D66892] - -https://github.com/intel/llvm-patches/commit/8af4449e2d201707f7f2f832b473a0439e255f32 - -Signed-off-by: Naveen Saini ---- - lib/Transforms/Utils/BasicBlockUtils.cpp | 23 ++++++++---- - test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++ - 2 files changed, 52 insertions(+), 7 deletions(-) - create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll - -diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp -index 5fa371377c8..3a90ae061fb 100644 ---- a/lib/Transforms/Utils/BasicBlockUtils.cpp -+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp -@@ -579,24 +579,33 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - - // The new block unconditionally branches to the old block. - BranchInst *BI = BranchInst::Create(BB, NewBB); -+ bool IsBBHeader = LI && LI->isLoopHeader(BB); -+ Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr; - // Splitting the predecessors of a loop header creates a preheader block. -- if (LI && LI->isLoopHeader(BB)) -+ if (IsBBHeader) - // Using the loop start line number prevents debuggers stepping into the - // loop body for this instruction. -- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc()); -+ BI->setDebugLoc(BBLoop->getStartLoc()); - else - BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); - - // Move the edges from Preds to point to NewBB instead of BB. -- for (unsigned i = 0, e = Preds.size(); i != e; ++i) { -+ for (BasicBlock *Pred : Preds) { -+ Instruction *PI = Pred->getTerminator(); - // This is slightly more strict than necessary; the minimum requirement - // is that there be no more than one indirectbr branching to BB. And - // all BlockAddress uses would need to be updated. -- assert(!isa(Preds[i]->getTerminator()) && -+ assert(!isa(PI) && - "Cannot split an edge from an IndirectBrInst"); -- assert(!isa(Preds[i]->getTerminator()) && -- "Cannot split an edge from a CallBrInst"); -- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); -+ assert(!isa(PI) && "Cannot split an edge from a CallBrInst"); -+ if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) { -+ // Update loop metadata if it exists. -+ if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) { -+ BI->setMetadata(LLVMContext::MD_loop, LoopMD); -+ PI->setMetadata(LLVMContext::MD_loop, nullptr); -+ } -+ } -+ PI->replaceUsesOfWith(BB, NewBB); - } - - // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI -diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll -new file mode 100644 -index 00000000000..c15c92fe3ae ---- /dev/null -+++ b/test/Transforms/LoopSimplify/loop_metadata.ll -@@ -0,0 +1,36 @@ -+; RUN: opt -S -loop-simplify < %s | FileCheck %s -+ -+; CHECK: for.cond.loopexit: -+; CHECK: br label %for.cond, !llvm.loop !0 -+; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit -+ -+define void @foo() { -+entry: -+ br label %for.cond -+ -+for.cond: ; preds = %for.cond1, %entry -+ %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ] -+ %cmp = icmp ult i32 %j, 8 -+ br i1 %cmp, label %for.body, label %for.end -+ -+for.body: ; preds = %for.cond -+ %dummy1 = add i32 1, 1 -+ %add = add nuw nsw i32 %j, 1 -+ br label %for.cond1 -+ -+for.cond1: ; preds = %for.body1, %for.body -+ %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ] -+ %cmp1 = icmp ult i32 %i.0, 8 -+ br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0 -+ -+for.body1: ; preds = %for.cond1 -+ %dummy2 = add i32 1, 1 -+ %inc = add nuw nsw i32 %i.0, 1 -+ br label %for.cond1 -+ -+for.end: ; preds = %for.cond -+ ret void -+} -+ -+!0 = distinct !{!0, !1} -+!1 = !{!"llvm.loop.unroll.full"} --- -2.18.0 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch deleted file mode 100644 index 48307deb..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch +++ /dev/null @@ -1,146 +0,0 @@ -From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001 -From: Aleksander Us -Date: Mon, 26 Aug 2019 15:45:47 +0300 -Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in - LFTR when possible. - -SCEV analysis cannot properly cache instruction with poison flags -(for example, add nsw outside of loop will not be reused by expander). -This can lead to generating of additional instructions by SCEV expander. - -Example IR: - - ... - %maxval = add nuw nsw i32 %a1, %a2 - ... -for.body: - ... - %cmp22 = icmp ult i32 %ivadd, %maxval - br i1 %cmp22, label %for.body, label %for.end - ... - -SCEV expander will generate copy of %maxval in preheader but without -nuw/nsw flags. This can be avoided by explicit check that iv count -value gives the same SCEV expressions as calculated by LFTR. - -Upstream-Status: Submitted [https://reviews.llvm.org/D66890] - -https://github.com/intel/llvm-patches/commit/fd6a6c97341a56fd21bc32bc940afea751312e8f - -Signed-off-by: Naveen Saini ---- - lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++- - test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++ - test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++----- - test/Transforms/IndVarSimplify/udiv.ll | 1 + - 4 files changed, 38 insertions(+), 7 deletions(-) - create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll - -diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp -index f9fc698a4a9..5e04dac8aa6 100644 ---- a/lib/Transforms/Scalar/IndVarSimplify.cpp -+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp -@@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, - if (UsePostInc) - IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); - -+ // If computed limit is equal to old limit then do not use SCEV expander -+ // because it can lost NUW/NSW flags and create extra instructions. -+ BranchInst *BI = cast(ExitingBB->getTerminator()); -+ if (ICmpInst *Cmp = dyn_cast(BI->getOperand(0))) { -+ Value *Limit = Cmp->getOperand(0); -+ if (!L->isLoopInvariant(Limit)) -+ Limit = Cmp->getOperand(1); -+ if (SE->getSCEV(Limit) == IVLimit) -+ return Limit; -+ } -+ - // Expand the code for the iteration count. - assert(SE->isLoopInvariant(IVLimit, L) && - "Computed iteration count is not loop invariant!"); -@@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, - // SCEV expression (IVInit) for a pointer type IV value (IndVar). - Type *LimitTy = ExitCount->getType()->isPointerTy() ? - IndVar->getType() : ExitCount->getType(); -- BranchInst *BI = cast(ExitingBB->getTerminator()); - return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); - } - } -diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll -new file mode 100644 -index 00000000000..abd1cbb6c51 ---- /dev/null -+++ b/test/Transforms/IndVarSimplify/add_nsw.ll -@@ -0,0 +1,23 @@ -+; RUN: opt -indvars -S %s | FileCheck %s -+ -+target datalayout = "e-p:32:32-i64:64-n8:16:32" -+ -+; CHECK: for.body.preheader: -+; CHECK-NOT: add -+; CHECK: for.body: -+ -+define void @foo(i32 %a1, i32 %a2) { -+entry: -+ %maxval = add nuw nsw i32 %a1, %a2 -+ %cmp = icmp slt i32 %maxval, 1 -+ br i1 %cmp, label %for.end, label %for.body -+ -+for.body: ; preds = %entry, %for.body -+ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ] -+ %add31 = add nuw nsw i32 %j.02, 1 -+ %cmp22 = icmp slt i32 %add31, %maxval -+ br i1 %cmp22, label %for.body, label %for.end -+ -+for.end: ; preds = %for.body -+ ret void -+} -diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll -index 14ae9738696..509d662b767 100644 ---- a/test/Transforms/IndVarSimplify/lftr-reuse.ll -+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll -@@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { - ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] - ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] - ; CHECK: outer.preheader: --; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 - ; CHECK-NEXT: br label [[OUTER:%.*]] - ; CHECK: outer: --; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] --; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] -+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] - ; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] - ; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 - ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] -@@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { - ; CHECK: inner: - ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ] - ; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 --; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]] -+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]] - ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] - ; CHECK: outer.inc.loopexit: - ; CHECK-NEXT: br label [[OUTER_INC]] - ; CHECK: outer.inc: - ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 --; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 --; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] -+; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]] - ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] - ; CHECK: exit.loopexit: - ; CHECK-NEXT: br label [[EXIT]] -diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll -index b3f2c2a6a66..3530343ef4a 100644 ---- a/test/Transforms/IndVarSimplify/udiv.ll -+++ b/test/Transforms/IndVarSimplify/udiv.ll -@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind - ; CHECK-LABEL: @foo( - ; CHECK: for.body.preheader: - ; CHECK-NOT: udiv -+; CHECK: for.body: - - define void @foo(double* %p, i64 %n) nounwind { - entry: --- -2.18.0 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-OpenCL-3.0-support.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-OpenCL-3.0-support.patch deleted file mode 100644 index 1ab00df0..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-OpenCL-3.0-support.patch +++ /dev/null @@ -1,8259 +0,0 @@ -From 8dbdb2f26674a938ff43b5bfe5b3bf3d1117f9e4 Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Wed, 7 Apr 2021 16:36:10 +0800 -Subject: [PATCH 1/7] OpenCL 3.0 support - -Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0001-OpenCL-3.0-support.patch] -Signed-off-by: Anton Zabaznov -Signed-off-by: Naveen Saini ---- - clang/include/clang/Basic/Builtins.def | 65 +- - clang/include/clang/Basic/Builtins.h | 13 +- - .../clang/Basic/DiagnosticParseKinds.td | 2 + - .../clang/Basic/DiagnosticSemaKinds.td | 7 + - clang/include/clang/Basic/LangOptions.def | 2 + - clang/include/clang/Basic/LangStandards.def | 4 + - .../include/clang/Basic/OpenCLExtensions.def | 15 + - clang/include/clang/Basic/OpenCLOptions.h | 175 +- - clang/include/clang/Driver/Options.td | 2 +- - clang/include/clang/Sema/Sema.h | 9 + - clang/lib/AST/ASTContext.cpp | 3 +- - clang/lib/Basic/Builtins.cpp | 28 +- - clang/lib/Basic/TargetInfo.cpp | 11 + - clang/lib/Basic/Targets.cpp | 1 - - clang/lib/CodeGen/CodeGenFunction.cpp | 6 +- - clang/lib/Frontend/CompilerInvocation.cpp | 22 +- - clang/lib/Frontend/InitPreprocessor.cpp | 6 +- - clang/lib/Headers/opencl-c-base.h | 75 +- - clang/lib/Headers/opencl-c.h | 3228 ++++++++++++++--- - clang/lib/Parse/ParseDecl.cpp | 12 +- - clang/lib/Parse/ParsePragma.cpp | 10 +- - clang/lib/Sema/OpenCLBuiltins.td | 49 +- - clang/lib/Sema/Sema.cpp | 47 +- - clang/lib/Sema/SemaChecking.cpp | 38 +- - clang/lib/Sema/SemaDecl.cpp | 15 +- - clang/lib/Sema/SemaDeclAttr.cpp | 9 +- - clang/lib/Sema/SemaDeclCXX.cpp | 10 + - clang/lib/Sema/SemaLookup.cpp | 19 +- - clang/lib/Sema/SemaType.cpp | 14 +- - .../CodeGenOpenCL/addr-space-struct-arg.cl | 7 +- - .../address-spaces-conversions.cl | 2 + - .../CodeGenOpenCL/address-spaces-mangling.cl | 3 +- - clang/test/CodeGenOpenCL/address-spaces.cl | 4 + - .../amdgcn-automatic-variable.cl | 1 + - .../CodeGenOpenCL/amdgpu-sizeof-alignof.cl | 21 +- - .../CodeGenOpenCL/arm-integer-dot-product.cl | 1 + - .../test/CodeGenOpenCL/cl-uniform-wg-size.cl | 2 + - clang/test/CodeGenOpenCL/fpmath.cl | 2 + - .../generic-address-space-feature.cl | 28 + - .../intel-subgroups-avc-ext-types.cl | 1 + - .../kernels-have-spir-cc-by-default.cl | 3 + - clang/test/CodeGenOpenCL/logical-ops.cl | 1 + - clang/test/CodeGenOpenCL/no-half.cl | 1 + - clang/test/CodeGenOpenCL/pipe_builtin.cl | 3 + - clang/test/CodeGenOpenCL/pipe_types.cl | 1 + - clang/test/CodeGenOpenCL/printf.cl | 2 + - clang/test/CodeGenOpenCL/unroll-hint.cl | 1 + - clang/test/Driver/autocomplete.c | 2 + - clang/test/Driver/opencl.cl | 2 + - clang/test/Driver/unknown-std.cl | 1 + - clang/test/Frontend/stdlang.c | 1 + - clang/test/Headers/opencl-c-header.cl | 7 +- - clang/test/Index/pipe-size.cl | 7 + - clang/test/Preprocessor/predefined-macros.c | 13 + - .../Sema/feature-extensions-simult-support.cl | 75 + - clang/test/Sema/features-ignore-pragma.cl | 24 + - clang/test/Sema/opencl-features-pipes.cl | 18 + - clang/test/Sema/opencl-features.cl | 128 + - clang/test/Sema/pipe_builtins_feature.cl | 21 + - .../address-spaces-conversions-cl2.0.cl | 3 + - clang/test/SemaOpenCL/address-spaces.cl | 1 + - .../SemaOpenCL/cl20-device-side-enqueue.cl | 16 +- - .../SemaOpenCL/forget-unsupported-builtins.cl | 22 + - clang/test/SemaOpenCL/image-features.cl | 20 + - .../SemaOpenCL/invalid-pipe-builtin-cl2.0.cl | 1 + - clang/test/SemaOpenCL/storageclass-cl20.cl | 1 + - .../TableGen/ClangOpenCLBuiltinEmitter.cpp | 35 +- - 67 files changed, 3656 insertions(+), 723 deletions(-) - create mode 100644 clang/test/CodeGenOpenCL/generic-address-space-feature.cl - create mode 100644 clang/test/Sema/feature-extensions-simult-support.cl - create mode 100644 clang/test/Sema/features-ignore-pragma.cl - create mode 100644 clang/test/Sema/opencl-features-pipes.cl - create mode 100644 clang/test/Sema/opencl-features.cl - create mode 100644 clang/test/Sema/pipe_builtins_feature.cl - create mode 100644 clang/test/SemaOpenCL/forget-unsupported-builtins.cl - create mode 100644 clang/test/SemaOpenCL/image-features.cl - -diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def -index 1a6c85ce2dd3..b76e277f0337 100644 ---- a/clang/include/clang/Basic/Builtins.def -+++ b/clang/include/clang/Basic/Builtins.def -@@ -106,6 +106,10 @@ - # define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) - #endif - -+#if defined(BUILTIN) && !defined(OPENCLBUILTIN) -+# define OPENCLBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS) -+#endif -+ - // Standard libc/libm functions: - BUILTIN(__builtin_atan2 , "ddd" , "Fne") - BUILTIN(__builtin_atan2f, "fff" , "Fne") -@@ -1514,50 +1518,54 @@ BUILTIN(__builtin_coro_param, "bv*v*", "n") - - // OpenCL v2.0 s6.13.16, s9.17.3.5 - Pipe functions. - // We need the generic prototype, since the packet type could be anything. --LANGBUILTIN(read_pipe, "i.", "tn", OCLC20_LANG) --LANGBUILTIN(write_pipe, "i.", "tn", OCLC20_LANG) -+OPENCLBUILTIN(read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") -+OPENCLBUILTIN(write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") - --LANGBUILTIN(reserve_read_pipe, "i.", "tn", OCLC20_LANG) --LANGBUILTIN(reserve_write_pipe, "i.", "tn", OCLC20_LANG) -+OPENCLBUILTIN(reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") -+OPENCLBUILTIN(reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") - --LANGBUILTIN(commit_write_pipe, "v.", "tn", OCLC20_LANG) --LANGBUILTIN(commit_read_pipe, "v.", "tn", OCLC20_LANG) -+OPENCLBUILTIN(commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") -+OPENCLBUILTIN(commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") - --LANGBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) --LANGBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) -+OPENCLBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") -+OPENCLBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") - --LANGBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) --LANGBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) -+OPENCLBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") -+OPENCLBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") - --LANGBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) --LANGBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) -+OPENCLBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") -+OPENCLBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") - --LANGBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) --LANGBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) -+OPENCLBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") -+OPENCLBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") - --LANGBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC20_LANG) --LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC20_LANG) -+OPENCLBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes") -+OPENCLBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes") - - // OpenCL v2.0 s6.13.17 - Enqueue kernel functions. - // Custom builtin check allows to perform special check of passed block arguments. --LANGBUILTIN(enqueue_kernel, "i.", "tn", OCLC20_LANG) --LANGBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC20_LANG) --LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", OCLC20_LANG) --LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", OCLC20_LANG) --LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC20_LANG) -+OPENCLBUILTIN(enqueue_kernel, "i.", "tn", OCLC2P_LANG, -+ "__opencl_c_device_enqueue") -+OPENCLBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC2P_LANG, -+ "__opencl_c_device_enqueue") -+OPENCLBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", -+ OCLC2P_LANG, "__opencl_c_device_enqueue") -+OPENCLBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", -+ OCLC2P_LANG, "__opencl_c_device_enqueue") -+OPENCLBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_device_enqueue") - - // OpenCL v2.0 s6.13.9 - Address space qualifier functions. - // FIXME: Pointer parameters of OpenCL builtins should have their address space - // requirement defined. --LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG) --LANGBUILTIN(to_local, "v*v*", "tn", OCLC20_LANG) --LANGBUILTIN(to_private, "v*v*", "tn", OCLC20_LANG) -+OPENCLBUILTIN(to_global, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") -+OPENCLBUILTIN(to_local, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") -+OPENCLBUILTIN(to_private, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") - - // OpenCL half load/store builtin --LANGBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES) --LANGBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES) --LANGBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES) --LANGBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES) -+OPENCLBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES, "") -+OPENCLBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES, "") -+OPENCLBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES, "") -+OPENCLBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES, "") - - // Builtins for os_log/os_trace - BUILTIN(__builtin_os_log_format_buffer_size, "zcC*.", "p:0:nut") -@@ -1578,3 +1586,4 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") - #undef BUILTIN - #undef LIBBUILTIN - #undef LANGBUILTIN -+#undef OPENCLBUILTIN -diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h -index e4ed482d9068..713ea4bc267d 100644 ---- a/clang/include/clang/Basic/Builtins.h -+++ b/clang/include/clang/Basic/Builtins.h -@@ -33,13 +33,13 @@ enum LanguageID { - CXX_LANG = 0x4, // builtin for cplusplus only. - OBJC_LANG = 0x8, // builtin for objective-c and objective-c++ - MS_LANG = 0x10, // builtin requires MS mode. -- OCLC20_LANG = 0x20, // builtin for OpenCL C 2.0 only. -+ OCLC2P_LANG = 0x20, // builtin for OpenCL C 2.0+ versions. - OCLC1X_LANG = 0x40, // builtin for OpenCL C 1.x only. - OMP_LANG = 0x80, // builtin requires OpenMP. - ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages. - ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG, // builtin requires GNU mode. - ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG, // builtin requires MS mode. -- ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC20_LANG // builtin for OCLC languages. -+ ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC2P_LANG // builtin for OCLC languages. - }; - - namespace Builtin { -@@ -228,6 +228,10 @@ public: - /// for non-builtins. - bool canBeRedeclared(unsigned ID) const; - -+ bool requiresFeatures(unsigned ID) const { -+ return requiresFeatures(getRecord(ID)); -+ } -+ - private: - const Info &getRecord(unsigned ID) const; - -@@ -235,6 +239,11 @@ private: - bool builtinIsSupported(const Builtin::Info &BuiltinInfo, - const LangOptions &LangOpts); - -+ bool OclBuiltinIsSupported(const Builtin::Info &BuiltinInfo, -+ const LangOptions &LangOpts) const; -+ -+ bool requiresFeatures(const Builtin::Info &BuiltinInfo) const; -+ - /// Helper function for isPrintfLike and isScanfLike. - bool isLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg, - const char *Fmt) const; -diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td -index 33adf093693f..43ea63586f91 100644 ---- a/clang/include/clang/Basic/DiagnosticParseKinds.td -+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td -@@ -1199,6 +1199,8 @@ def warn_pragma_unsupported_extension : Warning< - "unsupported OpenCL extension %0 - ignoring">, InGroup; - def warn_pragma_extension_is_core : Warning< - "OpenCL extension %0 is core feature or supported optional core feature - ignoring">, InGroup>, DefaultIgnore; -+def warn_opencl_pragma_feature_ignore : Warning< -+ "OpenCL feature support can't be controlled via pragma, ignoring">, InGroup; - - // OpenCL errors. - def err_opencl_taking_function_address_parser : Error< -diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td -index 917377420505..91648aa27820 100644 ---- a/clang/include/clang/Basic/DiagnosticSemaKinds.td -+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td -@@ -9378,6 +9378,13 @@ def ext_opencl_ext_vector_type_rgba_selector: ExtWarn< - def err_openclcxx_placement_new : Error< - "use of placement new requires explicit declaration">; - -+def err_opencl_pipes_require_feat : Error< -+ "usage of OpenCL pipes requires feature support">; -+def err_opencl_memory_scope_require_feat : Error< -+ "usage of memory scope requires feature support">; -+def err_opencl_memory_ordering_require_feat : Error< -+ "usage of memory ordering requires feature support">; -+ - // MIG routine annotations. - def warn_mig_server_routine_does_not_return_kern_return_t : Warning< - "'mig_server_routine' attribute only applies to routines that return a kern_return_t">, -diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def -index 3319a3123976..406f45c0be69 100644 ---- a/clang/include/clang/Basic/LangOptions.def -+++ b/clang/include/clang/Basic/LangOptions.def -@@ -203,6 +203,8 @@ LANGOPT(OpenCL , 1, 0, "OpenCL") - LANGOPT(OpenCLVersion , 32, 0, "OpenCL C version") - LANGOPT(OpenCLCPlusPlus , 1, 0, "C++ for OpenCL") - LANGOPT(OpenCLCPlusPlusVersion , 32, 0, "C++ for OpenCL version") -+LANGOPT(OpenCLGenericKeyword , 1, 0, "OpenCL generic keyword") -+LANGOPT(OpenCLPipeKeyword , 1, 0, "OpenCL pipe keyword") - LANGOPT(NativeHalfType , 1, 0, "Native half type support") - LANGOPT(NativeHalfArgsAndReturns, 1, 0, "Native half args and returns") - LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns") -diff --git a/clang/include/clang/Basic/LangStandards.def b/clang/include/clang/Basic/LangStandards.def -index 7f1a24db7e9b..69aaba3ff5a2 100644 ---- a/clang/include/clang/Basic/LangStandards.def -+++ b/clang/include/clang/Basic/LangStandards.def -@@ -167,6 +167,9 @@ LANGSTANDARD(opencl12, "cl1.2", - LANGSTANDARD(opencl20, "cl2.0", - OpenCL, "OpenCL 2.0", - LineComment | C99 | Digraphs | HexFloat | OpenCL) -+LANGSTANDARD(opencl30, "cl3.0", -+ OpenCL, "OpenCL 3.0", -+ LineComment | C99 | Digraphs | HexFloat | OpenCL) - LANGSTANDARD(openclcpp, "clc++", - OpenCL, "C++ for OpenCL", - LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 | -@@ -176,6 +179,7 @@ LANGSTANDARD_ALIAS_DEPR(opencl10, "CL") - LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1") - LANGSTANDARD_ALIAS_DEPR(opencl12, "CL1.2") - LANGSTANDARD_ALIAS_DEPR(opencl20, "CL2.0") -+LANGSTANDARD_ALIAS_DEPR(opencl30, "CL3.0") - LANGSTANDARD_ALIAS_DEPR(openclcpp, "CLC++") - - // CUDA -diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def -index 517481584313..608f78a13eef 100644 ---- a/clang/include/clang/Basic/OpenCLExtensions.def -+++ b/clang/include/clang/Basic/OpenCLExtensions.def -@@ -93,6 +93,21 @@ OPENCLEXT_INTERNAL(cl_intel_subgroups, 120, ~0U) - OPENCLEXT_INTERNAL(cl_intel_subgroups_short, 120, ~0U) - OPENCLEXT_INTERNAL(cl_intel_device_side_avc_motion_estimation, 120, ~0U) - -+OPENCLEXT_INTERNAL(__opencl_c_pipes, 200, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_generic_address_space, 200, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_work_group_collective_functions, 200, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_atomic_order_acq_rel, 200, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_atomic_order_seq_cst, 200, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_device, 200, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_all_devices, 200, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_subgroups, 200, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_3d_image_writes, 100, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_device_enqueue, 200, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_read_write_images, 200, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_program_scope_global_variables, 200, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_fp64, 120, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_int64, 100, ~0U) -+OPENCLEXT_INTERNAL(__opencl_c_images, 100, ~0U) - #undef OPENCLEXT_INTERNAL - - #ifdef OPENCLEXT -diff --git a/clang/include/clang/Basic/OpenCLOptions.h b/clang/include/clang/Basic/OpenCLOptions.h -index 15661154eab5..9a3a8e33cadd 100644 ---- a/clang/include/clang/Basic/OpenCLOptions.h -+++ b/clang/include/clang/Basic/OpenCLOptions.h -@@ -16,11 +16,16 @@ - - #include "clang/Basic/LangOptions.h" - #include "llvm/ADT/StringMap.h" -+#include "llvm/ADT/StringSwitch.h" - - namespace clang { - - /// OpenCL supported extensions and optional core features - class OpenCLOptions { -+ // OpenCL Version -+ unsigned CLVer = 120; -+ bool IsOpenCLCPlusPlus = false; -+ - struct Info { - bool Supported; // Is this option supported - bool Enabled; // Is this option enabled -@@ -31,7 +36,38 @@ class OpenCLOptions { - :Supported(S), Enabled(E), Avail(A), Core(C){} - }; - llvm::StringMap OptMap; -+ - public: -+ void setOpenCLVersion(const LangOptions &LO) { -+ IsOpenCLCPlusPlus = LO.OpenCLCPlusPlus; -+ CLVer = IsOpenCLCPlusPlus ? 200 : LO.OpenCLVersion; -+ } -+ -+ // Get extension which is semantically equivalent to a given feature -+ // if exists (e.g. __opencl_c_subgroups -> cl_khr_subgroups) -+ llvm::Optional getEquivalentExtension(StringRef Feature) const { -+ return llvm::StringSwitch>(Feature) -+ .Case("__opencl_c_3d_image_writes", -+ Optional("cl_khr_3d_image_writes")) -+ .Case("__opencl_c_subgroups", Optional("cl_khr_subgroups")) -+ .Case("__opencl_c_fp64", Optional("cl_khr_fp64")) -+ .Default(Optional()); -+ } -+ -+ // Same as above but for extensions -+ llvm::Optional getEquivalentFeature(StringRef Extension) const { -+ return llvm::StringSwitch>(Extension) -+ .Case("cl_khr_3d_image_writes", -+ Optional("__opencl_c_3d_image_writes")) -+ .Case("cl_khr_subgroups", Optional("__opencl_c_subgroups")) -+ .Case("cl_khr_fp64", Optional("__opencl_c_fp64")) -+ .Default(Optional()); -+ } -+ -+ bool isFeature(llvm::StringRef Ext) const { -+ return Ext.startswith("__opencl_c"); -+ } -+ - bool isKnown(llvm::StringRef Ext) const { - return OptMap.find(Ext) != OptMap.end(); - } -@@ -42,32 +78,88 @@ public: - - // Is supported as either an extension or an (optional) core feature for - // OpenCL version \p CLVer. -- bool isSupported(llvm::StringRef Ext, const LangOptions &LO) const { -+ bool isSupported(llvm::StringRef Ext) const { - // In C++ mode all extensions should work at least as in v2.0. -- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; - auto I = OptMap.find(Ext)->getValue(); - return I.Supported && I.Avail <= CLVer; - } - - // Is supported (optional) OpenCL core features for OpenCL version \p CLVer. - // For supported extension, return false. -- bool isSupportedCore(llvm::StringRef Ext, const LangOptions &LO) const { -+ bool isSupportedCore(llvm::StringRef Ext) const { - // In C++ mode all extensions should work at least as in v2.0. -- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; - auto I = OptMap.find(Ext)->getValue(); - return I.Supported && I.Avail <= CLVer && I.Core != ~0U && CLVer >= I.Core; - } - - // Is supported OpenCL extension for OpenCL version \p CLVer. - // For supported (optional) core feature, return false. -- bool isSupportedExtension(llvm::StringRef Ext, const LangOptions &LO) const { -+ bool isSupportedExtension(llvm::StringRef Ext) const { - // In C++ mode all extensions should work at least as in v2.0. -- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; - auto I = OptMap.find(Ext)->getValue(); - return I.Supported && I.Avail <= CLVer && (I.Core == ~0U || CLVer < I.Core); - } - -+ // Support features whose support is directly related to the -+ // specific OpenCL version. For example, OpenCL 2.0 supports -+ // all features that are optional in 3.0 -+ void adjustFeatures() { -+ // Support int64 by default (assume compiling for FULL profile) -+ OptMap["__opencl_c_int64"].Supported = true; -+ -+ if (CLVer >= 300) { -+ // Simultaneously support extension and corresponding feature -+ for (llvm::StringRef F : -+ {"__opencl_c_subgroups", "__opencl_c_3d_image_writes", -+ "__opencl_c_fp64"}) { -+ auto Ext = getEquivalentExtension(F); -+ OptMap[*Ext].Supported = OptMap[F].Supported; -+ } -+ -+ // OpenCL C compilers that define the feature macro __opencl_c_pipes or -+ // or __opencl_c_device_enqueue must also define the -+ // feature macro __opencl_c_generic_address_space. -+ if (OptMap["__opencl_c_pipes"].Supported || -+ OptMap["__opencl_c_device_enqueue"].Supported) -+ OptMap["__opencl_c_generic_address_space"].Supported = true; -+ -+ // OpenCL C compilers that define the feature macro -+ // __opencl_c_3d_image_writes or __opencl_c_read_write_images must also -+ // define the feature macro __opencl_c_images -+ if (OptMap["__opencl_c_3d_image_writes"].Supported || -+ OptMap["__opencl_c_read_write_images"].Supported) -+ OptMap["__opencl_c_images"].Supported = true; -+ -+ // All other features are already supported with options -+ // or in target settings -+ return; -+ } -+ -+ auto FeaturesRange = llvm::make_filter_range( -+ OptMap, [&](llvm::StringMapEntry &OptVal) { -+ auto Opt = OptVal.getKey(); -+ return isFeature(Opt); -+ }); -+ -+ for (auto &It : FeaturesRange) { -+ auto &Info = It.getValue(); -+ // For OpenCL version less then 3.0 some -+ // features should be supported simulateneously -+ // with specific extension -+ if (Optional Ext = getEquivalentExtension(It.getKey())) -+ Info.Supported = Info.Enabled = OptMap[*Ext].Supported; -+ else if (Info.Avail <= CLVer) -+ Info.Supported = Info.Enabled = true; -+ } -+ } -+ - void enable(llvm::StringRef Ext, bool V = true) { -+ // Ignore disabling extensions if corresponding features -+ // already supported for OpenCL version higher then 3.0 -+ if (CLVer >= 300) -+ if (Optional F = getEquivalentFeature(Ext)) -+ if (V != OptMap[*F].Enabled) -+ return; - OptMap[Ext].Enabled = V; - } - -@@ -96,7 +188,7 @@ public: - OptMap[Ext].Supported = V; - } - -- OpenCLOptions(){ -+ OpenCLOptions() { - #define OPENCLEXT_INTERNAL(Ext, AvailVer, CoreVer) \ - OptMap[#Ext].Avail = AvailVer; \ - OptMap[#Ext].Core = CoreVer; -@@ -104,35 +196,86 @@ public: - } - - void addSupport(const OpenCLOptions &Opts) { -+ assert(IsOpenCLCPlusPlus == Opts.IsOpenCLCPlusPlus && CLVer == Opts.CLVer); - for (auto &I:Opts.OptMap) -- if (I.second.Supported) -+ if (I.second.Supported) { - OptMap[I.getKey()].Supported = true; -+ // All features are enabled as they are supported -+ if (isFeature(I.getKey())) -+ OptMap[I.getKey()].Enabled = true; -+ } -+ if (CLVer >= 300) { -+ // Enabling extensions with respect to features -+ for (llvm::StringRef Ext : -+ {"cl_khr_3d_image_writes", "cl_khr_subgroups", "cl_khr_fp64"}) { -+ auto Feature = getEquivalentFeature(Ext); -+ enable(Ext, OptMap[*Feature].Enabled); -+ } -+ } - } - - void copy(const OpenCLOptions &Opts) { -+ CLVer = Opts.CLVer; -+ IsOpenCLCPlusPlus = Opts.IsOpenCLCPlusPlus; - OptMap = Opts.OptMap; - } - - // Turn on or off support of all options. - void supportAll(bool On = true) { -- for (llvm::StringMap::iterator I = OptMap.begin(), -- E = OptMap.end(); I != E; ++I) -- I->second.Supported = On; -+ for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); -+ I != E; ++I) -+ if (!isFeature(I->getKey())) -+ I->second.Supported = On; - } - - void disableAll() { -- for (llvm::StringMap::iterator I = OptMap.begin(), -- E = OptMap.end(); I != E; ++I) -- I->second.Enabled = false; -+ for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); -+ I != E; ++I) { -+ auto Ext = I->getKey(); -+ if (!isFeature(Ext)) -+ enable(Ext, false); -+ } - } - -- void enableSupportedCore(LangOptions LO) { -+ void enableSupportedCore() { - for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); - I != E; ++I) -- if (isSupportedCore(I->getKey(), LO)) -+ if (isSupportedCore(I->getKey())) - I->second.Enabled = true; - } - -+ // This enum specifies how OpenCL versions map into values -+ // for encoding. This is used when generating built-ins -+ // from tablegen -+ enum OpenCLVersionsEncodings : unsigned short { -+ OPENCL_C_100_CODE = 0x1, -+ OPENCL_C_110_CODE = 0x2, -+ OPENCL_C_120_CODE = 0x4, -+ OPENCL_C_200_CODE = 0x8, -+ OPENCL_C_300_CODE = 0x10, -+ OPENCL_C_ALL_CODE = 0x1f -+ }; -+ -+ // Encode version into single integer -+ static unsigned short EncodeVersion(unsigned OpenCLVersion) { -+ switch (OpenCLVersion) { -+ default: -+ llvm_unreachable("Unknown OpenCL version"); -+ case 0: -+ return OpenCLVersionsEncodings::OPENCL_C_ALL_CODE; -+ case 100: -+ return OpenCLVersionsEncodings::OPENCL_C_100_CODE; -+ case 110: -+ return OpenCLVersionsEncodings::OPENCL_C_110_CODE; -+ case 120: -+ return OpenCLVersionsEncodings::OPENCL_C_120_CODE; -+ case 200: -+ return OpenCLVersionsEncodings::OPENCL_C_200_CODE; -+ case 300: -+ return OpenCLVersionsEncodings::OPENCL_C_300_CODE; -+ } -+ } -+ - friend class ASTWriter; - friend class ASTReader; - }; -diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td -index 391c895a453b..e03c22c749ad 100644 ---- a/clang/include/clang/Driver/Options.td -+++ b/clang/include/clang/Driver/Options.td -@@ -522,7 +522,7 @@ def cl_mad_enable : Flag<["-"], "cl-mad-enable">, Group, Flags<[CC - def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group, Flags<[CC1Option]>, - HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">; - def cl_std_EQ : Joined<["-"], "cl-std=">, Group, Flags<[CC1Option]>, -- HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,clc++,CLC++">; -+ HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,clc++,CLC++">; - def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group, Flags<[CC1Option]>, - HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; - def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group, Flags<[CC1Option]>, -diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h -index 842e49602274..d8ac91bc8a31 100644 ---- a/clang/include/clang/Sema/Sema.h -+++ b/clang/include/clang/Sema/Sema.h -@@ -9632,6 +9632,10 @@ public: - /// \return true if type is disabled. - bool checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E); - -+ bool checkOpenCLSubgroupExtForCallExpr(CallExpr *Call); -+ -+ bool isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const; -+ - //===--------------------------------------------------------------------===// - // OpenMP directives and clauses. - // -@@ -11102,6 +11106,11 @@ public: - /// that the user intended an assignment used as condition. - void DiagnoseEqualityWithExtraParens(ParenExpr *ParenE); - -+ template -+ void DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName, -+ DiagLocT DiagLoc, DiagInfoT DiagInfo, -+ unsigned Selector, SourceRange SrcRange); -+ - /// CheckCXXBooleanCondition - Returns true if conversion to bool is invalid. - ExprResult CheckCXXBooleanCondition(Expr *CondExpr, bool IsConstexpr = false); - -diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp -index 1be72efe4de8..2a81fbcde79d 100644 ---- a/clang/lib/AST/ASTContext.cpp -+++ b/clang/lib/AST/ASTContext.cpp -@@ -1490,7 +1490,8 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, - ObjCSuperType = QualType(); - - // void * type -- if (LangOpts.OpenCLVersion >= 200) { -+ if (Target.getSupportedOpenCLOpts().isEnabled( -+ "__opencl_c_generic_address_space")) { - auto Q = VoidTy.getQualifiers(); - Q.setAddressSpace(LangAS::opencl_generic); - VoidPtrTy = getPointerType(getCanonicalType( -diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp -index 0cd89df41b67..7a3067345098 100644 ---- a/clang/lib/Basic/Builtins.cpp -+++ b/clang/lib/Basic/Builtins.cpp -@@ -23,6 +23,8 @@ static const Builtin::Info BuiltinInfo[] = { - { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr }, - #define LANGBUILTIN(ID, TYPE, ATTRS, LANGS) \ - { #ID, TYPE, ATTRS, nullptr, LANGS, nullptr }, -+#define OPENCLBUILTIN(ID, TYPE, ATTRS, LANGS, FEATURE) \ -+ {#ID, TYPE, ATTRS, nullptr, LANGS, FEATURE}, - #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS) \ - { #ID, TYPE, ATTRS, HEADER, LANGS, nullptr }, - #include "clang/Basic/Builtins.def" -@@ -69,16 +71,17 @@ bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo, - bool ObjCUnsupported = !LangOpts.ObjC && BuiltinInfo.Langs == OBJC_LANG; - bool OclC1Unsupported = (LangOpts.OpenCLVersion / 100) != 1 && - (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES ) == OCLC1X_LANG; -- bool OclC2Unsupported = -- (LangOpts.OpenCLVersion != 200 && !LangOpts.OpenCLCPlusPlus) && -- (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC20_LANG; -+ bool OclC2PUnsupported = -+ (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC2P_LANG && -+ ((LangOpts.OpenCLVersion < 200 && !LangOpts.OpenCLCPlusPlus) || -+ !OclBuiltinIsSupported(BuiltinInfo, LangOpts)); - bool OclCUnsupported = !LangOpts.OpenCL && - (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES); - bool OpenMPUnsupported = !LangOpts.OpenMP && BuiltinInfo.Langs == OMP_LANG; - bool CPlusPlusUnsupported = - !LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG; - return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported && -- !OclC1Unsupported && !OclC2Unsupported && !OpenMPUnsupported && -+ !OclC1Unsupported && !OclC2PUnsupported && !OpenMPUnsupported && - !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported && - !CPlusPlusUnsupported; - } -@@ -191,3 +194,20 @@ bool Builtin::Context::canBeRedeclared(unsigned ID) const { - (!hasReferenceArgsOrResult(ID) && - !hasCustomTypechecking(ID)); - } -+ -+bool Builtin::Context::OclBuiltinIsSupported( -+ const Builtin::Info &BuiltinInfo, const LangOptions &LangOpts) const { -+ if (!requiresFeatures(BuiltinInfo)) -+ return true; -+ -+ return llvm::StringSwitch(BuiltinInfo.Features) -+ .Case("__opencl_c_device_enqueue", LangOpts.Blocks) -+ .Case("__opencl_c_generic_address_space", LangOpts.OpenCLGenericKeyword) -+ .Case("__opencl_c_pipes", LangOpts.OpenCLPipeKeyword) -+ .Default(false); -+} -+ -+bool Builtin::Context::requiresFeatures( -+ const Builtin::Info &BuiltinInfo) const { -+ return BuiltinInfo.Features && llvm::StringRef(BuiltinInfo.Features) != ""; -+} -diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp -index 3a21a19e1f19..795311d5934e 100644 ---- a/clang/lib/Basic/TargetInfo.cpp -+++ b/clang/lib/Basic/TargetInfo.cpp -@@ -377,6 +377,17 @@ void TargetInfo::adjust(LangOptions &Opts) { - HalfFormat = &llvm::APFloat::IEEEhalf(); - FloatFormat = &llvm::APFloat::IEEEsingle(); - LongDoubleFormat = &llvm::APFloat::IEEEquad(); -+ -+ auto &SupportedOCLOpts = getTargetOpts().SupportedOpenCLOptions; -+ -+ SupportedOCLOpts.setOpenCLVersion(Opts); -+ SupportedOCLOpts.adjustFeatures(); -+ -+ if (!Opts.OpenCLCPlusPlus && Opts.OpenCLVersion >= 200) -+ Opts.Blocks = SupportedOCLOpts.isSupported("__opencl_c_device_enqueue"); -+ Opts.OpenCLGenericKeyword = -+ SupportedOCLOpts.isSupported("__opencl_c_generic_address_space"); -+ Opts.OpenCLPipeKeyword = SupportedOCLOpts.isSupported("__opencl_c_pipes"); - } - - if (Opts.LongDoubleSize) { -diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp -index c063f8ca4472..b394be18a234 100644 ---- a/clang/lib/Basic/Targets.cpp -+++ b/clang/lib/Basic/Targets.cpp -@@ -39,7 +39,6 @@ - #include "clang/Basic/Diagnostic.h" - #include "llvm/ADT/StringExtras.h" - #include "llvm/ADT/Triple.h" -- - using namespace clang; - - namespace clang { -diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp -index 648e6d9c214a..6d839fa61abc 100644 ---- a/clang/lib/CodeGen/CodeGenFunction.cpp -+++ b/clang/lib/CodeGen/CodeGenFunction.cpp -@@ -2303,11 +2303,11 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, - std::string MissingFeature; - if (BuiltinID) { - SmallVector ReqFeatures; -- const char *FeatureList = -- CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); - // Return if the builtin doesn't have any required features. -- if (!FeatureList || StringRef(FeatureList) == "") -+ if (!CGM.getContext().BuiltinInfo.requiresFeatures(BuiltinID)) - return; -+ const char *FeatureList = -+ CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); - StringRef(FeatureList).split(ReqFeatures, ','); - if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) - CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature) -diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp -index e98a407ac42f..18fa06bf3c6d 100644 ---- a/clang/lib/Frontend/CompilerInvocation.cpp -+++ b/clang/lib/Frontend/CompilerInvocation.cpp -@@ -2298,6 +2298,8 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, - Opts.OpenCLVersion = 120; - else if (LangStd == LangStandard::lang_opencl20) - Opts.OpenCLVersion = 200; -+ else if (LangStd == LangStandard::lang_opencl30) -+ Opts.OpenCLVersion = 300; - else if (LangStd == LangStandard::lang_openclcpp) - Opts.OpenCLCPlusPlusVersion = 100; - -@@ -2498,14 +2500,15 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, - // -cl-std only applies for OpenCL language standards. - // Override the -std option in this case. - if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) { -- LangStandard::Kind OpenCLLangStd -- = llvm::StringSwitch(A->getValue()) -- .Cases("cl", "CL", LangStandard::lang_opencl10) -- .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) -- .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) -- .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) -- .Cases("clc++", "CLC++", LangStandard::lang_openclcpp) -- .Default(LangStandard::lang_unspecified); -+ LangStandard::Kind OpenCLLangStd = -+ llvm::StringSwitch(A->getValue()) -+ .Cases("cl", "CL", LangStandard::lang_opencl10) -+ .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) -+ .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) -+ .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) -+ .Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30) -+ .Cases("clc++", "CLC++", LangStandard::lang_openclcpp) -+ .Default(LangStandard::lang_unspecified); - - if (OpenCLLangStd == LangStandard::lang_unspecified) { - Diags.Report(diag::err_drv_invalid_value) -@@ -2787,8 +2790,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, - - Opts.RTTI = Opts.CPlusPlus && !Args.hasArg(OPT_fno_rtti); - Opts.RTTIData = Opts.RTTI && !Args.hasArg(OPT_fno_rtti_data); -- Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL -- && Opts.OpenCLVersion == 200); -+ Opts.Blocks = Args.hasArg(OPT_fblocks); - Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional); - Opts.Coroutines = Opts.CPlusPlus2a || Args.hasArg(OPT_fcoroutines_ts); - -diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp -index c273cb96d9b9..aefd208e6cd3 100644 ---- a/clang/lib/Frontend/InitPreprocessor.cpp -+++ b/clang/lib/Frontend/InitPreprocessor.cpp -@@ -445,6 +445,9 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, - case 200: - Builder.defineMacro("__OPENCL_C_VERSION__", "200"); - break; -+ case 300: -+ Builder.defineMacro("__OPENCL_C_VERSION__", "300"); -+ break; - default: - llvm_unreachable("Unsupported OpenCL version"); - } -@@ -453,6 +456,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, - Builder.defineMacro("CL_VERSION_1_1", "110"); - Builder.defineMacro("CL_VERSION_1_2", "120"); - Builder.defineMacro("CL_VERSION_2_0", "200"); -+ Builder.defineMacro("CL_VERSION_3_0", "300"); - - if (TI.isLittleEndian()) - Builder.defineMacro("__ENDIAN_LITTLE__"); -@@ -1101,7 +1105,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, - // OpenCL definitions. - if (LangOpts.OpenCL) { - #define OPENCLEXT(Ext) \ -- if (TI.getSupportedOpenCLOpts().isSupported(#Ext, LangOpts)) \ -+ if (TI.getSupportedOpenCLOpts().isSupported(#Ext)) \ - Builder.defineMacro(#Ext); - #include "clang/Basic/OpenCLExtensions.def" - -diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h -index 430e07d36f62..2cc688ccc3da 100644 ---- a/clang/lib/Headers/opencl-c-base.h -+++ b/clang/lib/Headers/opencl-c-base.h -@@ -9,6 +9,59 @@ - #ifndef _OPENCL_BASE_H_ - #define _OPENCL_BASE_H_ - -+// Add predefined macros to build headers with standalone executable -+#ifndef CL_VERSION_3_0 -+ #define CL_VERSION_3_0 300 -+#endif -+ -+// Define features for 2.0 for header backward compatibility -+#ifndef __opencl_c_int64 -+ #define __opencl_c_int64 1 -+#endif -+#if __OPENCL_C_VERSION__ != CL_VERSION_3_0 -+ #ifndef __opencl_c_images -+ #define __opencl_c_images 1 -+ #endif -+#endif -+#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) -+#ifndef __opencl_c_pipes -+ #define __opencl_c_pipes 1 -+#endif -+#ifndef __opencl_c_generic_address_space -+ #define __opencl_c_generic_address_space 1 -+#endif -+#ifndef __opencl_c_work_group_collective_functions -+ #define __opencl_c_work_group_collective_functions 1 -+#endif -+#ifndef __opencl_c_atomic_order_acq_rel -+ #define __opencl_c_atomic_order_acq_rel 1 -+#endif -+#ifndef __opencl_c_atomic_order_seq_cst -+ #define __opencl_c_atomic_order_seq_cst 1 -+#endif -+#ifndef __opencl_c_atomic_scope_device -+ #define __opencl_c_atomic_scope_device 1 -+#endif -+#ifndef __opencl_c_atomic_scope_all_devices -+ #define __opencl_c_atomic_scope_all_devices 1 -+#endif -+#ifndef __opencl_c_subgroups -+ #define __opencl_c_subgroups 1 -+#endif -+#ifndef __opencl_c_3d_image_writes -+ #define __opencl_c_3d_image_writes 1 -+#endif -+#ifndef __opencl_c_device_enqueue -+ #define __opencl_c_device_enqueue 1 -+#endif -+#ifndef __opencl_c_read_write_images -+ #define __opencl_c_read_write_images 1 -+#endif -+#ifndef __opencl_c_program_scope_global_variables -+ #define __opencl_c_program_scope_global_variables 1 -+#endif -+#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) -+ - // built-in scalar data types: - - /** -@@ -115,7 +168,12 @@ typedef half half4 __attribute__((ext_vector_type(4))); - typedef half half8 __attribute__((ext_vector_type(8))); - typedef half half16 __attribute__((ext_vector_type(16))); - #endif --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+ -+#ifndef __opencl_c_fp64 -+ #define __opencl_c_fp64 1 -+#endif -+ - #if __OPENCL_C_VERSION__ < CL_VERSION_1_2 - #pragma OPENCL EXTENSION cl_khr_fp64 : enable - #endif -@@ -281,9 +339,17 @@ typedef uint cl_mem_fence_flags; - typedef enum memory_scope { - memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, - memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, -+#ifdef __opencl_c_atomic_scope_device - memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, -+#endif -+#ifdef __opencl_c_atomic_scope_all_devices -+ #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+ memory_scope_all_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, -+ #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) - memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, --#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) -+#endif -+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ -+ defined(__opencl_c_subgroups) - memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP - #endif - } memory_scope; -@@ -301,13 +367,14 @@ typedef enum memory_scope { - #define ATOMIC_FLAG_INIT 0 - - // enum values aligned with what clang uses in EmitAtomicExpr() --typedef enum memory_order --{ -+typedef enum memory_order { - memory_order_relaxed = __ATOMIC_RELAXED, - memory_order_acquire = __ATOMIC_ACQUIRE, - memory_order_release = __ATOMIC_RELEASE, - memory_order_acq_rel = __ATOMIC_ACQ_REL, -+#ifdef __opencl_c_atomic_order_seq_cst - memory_order_seq_cst = __ATOMIC_SEQ_CST -+#endif //__opencl_c_atomic_order_seq_cst - } memory_order; - - #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h -index 3210f93cc851..93a946cec5b1 100644 ---- a/clang/lib/Headers/opencl-c.h -+++ b/clang/lib/Headers/opencl-c.h -@@ -35,7 +35,6 @@ - #define __purefn __attribute__((pure)) - #define __cnfn __attribute__((const)) - -- - // OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions - - char __ovld __cnfn convert_char_rte(char); -@@ -4632,7 +4631,7 @@ float16 __ovld __cnfn convert_float16(float16); - - // Conversions with double data type parameters or return value. - --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - char __ovld __cnfn convert_char(double); - char __ovld __cnfn convert_char_rte(double); - char __ovld __cnfn convert_char_rtn(double); -@@ -5452,7 +5451,7 @@ double16 __ovld __cnfn convert_double16_rtz(uchar16); - double16 __ovld __cnfn convert_double16_rtz(uint16); - double16 __ovld __cnfn convert_double16_rtz(ulong16); - double16 __ovld __cnfn convert_double16_rtz(ushort16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - #ifdef cl_khr_fp16 - // Convert half types to non-double types. -@@ -6270,7 +6269,7 @@ half16 __ovld __cnfn convert_half16_rtz(float16); - half16 __ovld __cnfn convert_half16_rtz(half16); - - // Convert half types to double types. --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn convert_double(half); - double __ovld __cnfn convert_double_rte(half); - double __ovld __cnfn convert_double_rtp(half); -@@ -6333,7 +6332,7 @@ half16 __ovld __cnfn convert_half16_rte(double16); - half16 __ovld __cnfn convert_half16_rtp(double16); - half16 __ovld __cnfn convert_half16_rtn(double16); - half16 __ovld __cnfn convert_half16_rtz(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - #endif // cl_khr_fp16 - -@@ -6404,14 +6403,14 @@ half16 __ovld __cnfn convert_half16_rtz(double16); - #define as_float8(x) __builtin_astype((x), float8) - #define as_float16(x) __builtin_astype((x), float16) - --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #define as_double(x) __builtin_astype((x), double) - #define as_double2(x) __builtin_astype((x), double2) - #define as_double3(x) __builtin_astype((x), double3) - #define as_double4(x) __builtin_astype((x), double4) - #define as_double8(x) __builtin_astype((x), double8) - #define as_double16(x) __builtin_astype((x), double16) --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - #ifdef cl_khr_fp16 - #define as_half(x) __builtin_astype((x), half) -@@ -6534,14 +6533,14 @@ float3 __ovld __cnfn acos(float3); - float4 __ovld __cnfn acos(float4); - float8 __ovld __cnfn acos(float8); - float16 __ovld __cnfn acos(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn acos(double); - double2 __ovld __cnfn acos(double2); - double3 __ovld __cnfn acos(double3); - double4 __ovld __cnfn acos(double4); - double8 __ovld __cnfn acos(double8); - double16 __ovld __cnfn acos(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn acos(half); - half2 __ovld __cnfn acos(half2); -@@ -6560,14 +6559,14 @@ float3 __ovld __cnfn acosh(float3); - float4 __ovld __cnfn acosh(float4); - float8 __ovld __cnfn acosh(float8); - float16 __ovld __cnfn acosh(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn acosh(double); - double2 __ovld __cnfn acosh(double2); - double3 __ovld __cnfn acosh(double3); - double4 __ovld __cnfn acosh(double4); - double8 __ovld __cnfn acosh(double8); - double16 __ovld __cnfn acosh(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn acosh(half); - half2 __ovld __cnfn acosh(half2); -@@ -6586,14 +6585,14 @@ float3 __ovld __cnfn acospi(float3 x); - float4 __ovld __cnfn acospi(float4 x); - float8 __ovld __cnfn acospi(float8 x); - float16 __ovld __cnfn acospi(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn acospi(double x); - double2 __ovld __cnfn acospi(double2 x); - double3 __ovld __cnfn acospi(double3 x); - double4 __ovld __cnfn acospi(double4 x); - double8 __ovld __cnfn acospi(double8 x); - double16 __ovld __cnfn acospi(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn acospi(half x); - half2 __ovld __cnfn acospi(half2 x); -@@ -6612,14 +6611,14 @@ float3 __ovld __cnfn asin(float3); - float4 __ovld __cnfn asin(float4); - float8 __ovld __cnfn asin(float8); - float16 __ovld __cnfn asin(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn asin(double); - double2 __ovld __cnfn asin(double2); - double3 __ovld __cnfn asin(double3); - double4 __ovld __cnfn asin(double4); - double8 __ovld __cnfn asin(double8); - double16 __ovld __cnfn asin(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn asin(half); - half2 __ovld __cnfn asin(half2); -@@ -6638,14 +6637,14 @@ float3 __ovld __cnfn asinh(float3); - float4 __ovld __cnfn asinh(float4); - float8 __ovld __cnfn asinh(float8); - float16 __ovld __cnfn asinh(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn asinh(double); - double2 __ovld __cnfn asinh(double2); - double3 __ovld __cnfn asinh(double3); - double4 __ovld __cnfn asinh(double4); - double8 __ovld __cnfn asinh(double8); - double16 __ovld __cnfn asinh(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn asinh(half); - half2 __ovld __cnfn asinh(half2); -@@ -6664,14 +6663,14 @@ float3 __ovld __cnfn asinpi(float3 x); - float4 __ovld __cnfn asinpi(float4 x); - float8 __ovld __cnfn asinpi(float8 x); - float16 __ovld __cnfn asinpi(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn asinpi(double x); - double2 __ovld __cnfn asinpi(double2 x); - double3 __ovld __cnfn asinpi(double3 x); - double4 __ovld __cnfn asinpi(double4 x); - double8 __ovld __cnfn asinpi(double8 x); - double16 __ovld __cnfn asinpi(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn asinpi(half x); - half2 __ovld __cnfn asinpi(half2 x); -@@ -6690,14 +6689,14 @@ float3 __ovld __cnfn atan(float3 y_over_x); - float4 __ovld __cnfn atan(float4 y_over_x); - float8 __ovld __cnfn atan(float8 y_over_x); - float16 __ovld __cnfn atan(float16 y_over_x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn atan(double y_over_x); - double2 __ovld __cnfn atan(double2 y_over_x); - double3 __ovld __cnfn atan(double3 y_over_x); - double4 __ovld __cnfn atan(double4 y_over_x); - double8 __ovld __cnfn atan(double8 y_over_x); - double16 __ovld __cnfn atan(double16 y_over_x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn atan(half y_over_x); - half2 __ovld __cnfn atan(half2 y_over_x); -@@ -6716,14 +6715,14 @@ float3 __ovld __cnfn atan2(float3 y, float3 x); - float4 __ovld __cnfn atan2(float4 y, float4 x); - float8 __ovld __cnfn atan2(float8 y, float8 x); - float16 __ovld __cnfn atan2(float16 y, float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn atan2(double y, double x); - double2 __ovld __cnfn atan2(double2 y, double2 x); - double3 __ovld __cnfn atan2(double3 y, double3 x); - double4 __ovld __cnfn atan2(double4 y, double4 x); - double8 __ovld __cnfn atan2(double8 y, double8 x); - double16 __ovld __cnfn atan2(double16 y, double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn atan2(half y, half x); - half2 __ovld __cnfn atan2(half2 y, half2 x); -@@ -6742,14 +6741,14 @@ float3 __ovld __cnfn atanh(float3); - float4 __ovld __cnfn atanh(float4); - float8 __ovld __cnfn atanh(float8); - float16 __ovld __cnfn atanh(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn atanh(double); - double2 __ovld __cnfn atanh(double2); - double3 __ovld __cnfn atanh(double3); - double4 __ovld __cnfn atanh(double4); - double8 __ovld __cnfn atanh(double8); - double16 __ovld __cnfn atanh(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn atanh(half); - half2 __ovld __cnfn atanh(half2); -@@ -6768,14 +6767,14 @@ float3 __ovld __cnfn atanpi(float3 x); - float4 __ovld __cnfn atanpi(float4 x); - float8 __ovld __cnfn atanpi(float8 x); - float16 __ovld __cnfn atanpi(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn atanpi(double x); - double2 __ovld __cnfn atanpi(double2 x); - double3 __ovld __cnfn atanpi(double3 x); - double4 __ovld __cnfn atanpi(double4 x); - double8 __ovld __cnfn atanpi(double8 x); - double16 __ovld __cnfn atanpi(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn atanpi(half x); - half2 __ovld __cnfn atanpi(half2 x); -@@ -6794,14 +6793,14 @@ float3 __ovld __cnfn atan2pi(float3 y, float3 x); - float4 __ovld __cnfn atan2pi(float4 y, float4 x); - float8 __ovld __cnfn atan2pi(float8 y, float8 x); - float16 __ovld __cnfn atan2pi(float16 y, float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn atan2pi(double y, double x); - double2 __ovld __cnfn atan2pi(double2 y, double2 x); - double3 __ovld __cnfn atan2pi(double3 y, double3 x); - double4 __ovld __cnfn atan2pi(double4 y, double4 x); - double8 __ovld __cnfn atan2pi(double8 y, double8 x); - double16 __ovld __cnfn atan2pi(double16 y, double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn atan2pi(half y, half x); - half2 __ovld __cnfn atan2pi(half2 y, half2 x); -@@ -6820,14 +6819,14 @@ float3 __ovld __cnfn cbrt(float3); - float4 __ovld __cnfn cbrt(float4); - float8 __ovld __cnfn cbrt(float8); - float16 __ovld __cnfn cbrt(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn cbrt(double); - double2 __ovld __cnfn cbrt(double2); - double3 __ovld __cnfn cbrt(double3); - double4 __ovld __cnfn cbrt(double4); - double8 __ovld __cnfn cbrt(double8); - double16 __ovld __cnfn cbrt(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn cbrt(half); - half2 __ovld __cnfn cbrt(half2); -@@ -6847,14 +6846,14 @@ float3 __ovld __cnfn ceil(float3); - float4 __ovld __cnfn ceil(float4); - float8 __ovld __cnfn ceil(float8); - float16 __ovld __cnfn ceil(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn ceil(double); - double2 __ovld __cnfn ceil(double2); - double3 __ovld __cnfn ceil(double3); - double4 __ovld __cnfn ceil(double4); - double8 __ovld __cnfn ceil(double8); - double16 __ovld __cnfn ceil(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn ceil(half); - half2 __ovld __cnfn ceil(half2); -@@ -6873,14 +6872,14 @@ float3 __ovld __cnfn copysign(float3 x, float3 y); - float4 __ovld __cnfn copysign(float4 x, float4 y); - float8 __ovld __cnfn copysign(float8 x, float8 y); - float16 __ovld __cnfn copysign(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn copysign(double x, double y); - double2 __ovld __cnfn copysign(double2 x, double2 y); - double3 __ovld __cnfn copysign(double3 x, double3 y); - double4 __ovld __cnfn copysign(double4 x, double4 y); - double8 __ovld __cnfn copysign(double8 x, double8 y); - double16 __ovld __cnfn copysign(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn copysign(half x, half y); - half2 __ovld __cnfn copysign(half2 x, half2 y); -@@ -6899,14 +6898,14 @@ float3 __ovld __cnfn cos(float3); - float4 __ovld __cnfn cos(float4); - float8 __ovld __cnfn cos(float8); - float16 __ovld __cnfn cos(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn cos(double); - double2 __ovld __cnfn cos(double2); - double3 __ovld __cnfn cos(double3); - double4 __ovld __cnfn cos(double4); - double8 __ovld __cnfn cos(double8); - double16 __ovld __cnfn cos(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn cos(half); - half2 __ovld __cnfn cos(half2); -@@ -6925,14 +6924,14 @@ float3 __ovld __cnfn cosh(float3); - float4 __ovld __cnfn cosh(float4); - float8 __ovld __cnfn cosh(float8); - float16 __ovld __cnfn cosh(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn cosh(double); - double2 __ovld __cnfn cosh(double2); - double3 __ovld __cnfn cosh(double3); - double4 __ovld __cnfn cosh(double4); - double8 __ovld __cnfn cosh(double8); - double16 __ovld __cnfn cosh(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn cosh(half); - half2 __ovld __cnfn cosh(half2); -@@ -6951,14 +6950,14 @@ float3 __ovld __cnfn cospi(float3 x); - float4 __ovld __cnfn cospi(float4 x); - float8 __ovld __cnfn cospi(float8 x); - float16 __ovld __cnfn cospi(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn cospi(double x); - double2 __ovld __cnfn cospi(double2 x); - double3 __ovld __cnfn cospi(double3 x); - double4 __ovld __cnfn cospi(double4 x); - double8 __ovld __cnfn cospi(double8 x); - double16 __ovld __cnfn cospi(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn cospi(half x); - half2 __ovld __cnfn cospi(half2 x); -@@ -6977,14 +6976,14 @@ float3 __ovld __cnfn erfc(float3); - float4 __ovld __cnfn erfc(float4); - float8 __ovld __cnfn erfc(float8); - float16 __ovld __cnfn erfc(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn erfc(double); - double2 __ovld __cnfn erfc(double2); - double3 __ovld __cnfn erfc(double3); - double4 __ovld __cnfn erfc(double4); - double8 __ovld __cnfn erfc(double8); - double16 __ovld __cnfn erfc(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn erfc(half); - half2 __ovld __cnfn erfc(half2); -@@ -7004,14 +7003,14 @@ float3 __ovld __cnfn erf(float3); - float4 __ovld __cnfn erf(float4); - float8 __ovld __cnfn erf(float8); - float16 __ovld __cnfn erf(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn erf(double); - double2 __ovld __cnfn erf(double2); - double3 __ovld __cnfn erf(double3); - double4 __ovld __cnfn erf(double4); - double8 __ovld __cnfn erf(double8); - double16 __ovld __cnfn erf(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn erf(half); - half2 __ovld __cnfn erf(half2); -@@ -7030,14 +7029,14 @@ float3 __ovld __cnfn exp(float3 x); - float4 __ovld __cnfn exp(float4 x); - float8 __ovld __cnfn exp(float8 x); - float16 __ovld __cnfn exp(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn exp(double x); - double2 __ovld __cnfn exp(double2 x); - double3 __ovld __cnfn exp(double3 x); - double4 __ovld __cnfn exp(double4 x); - double8 __ovld __cnfn exp(double8 x); - double16 __ovld __cnfn exp(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn exp(half x); - half2 __ovld __cnfn exp(half2 x); -@@ -7056,14 +7055,14 @@ float3 __ovld __cnfn exp2(float3); - float4 __ovld __cnfn exp2(float4); - float8 __ovld __cnfn exp2(float8); - float16 __ovld __cnfn exp2(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn exp2(double); - double2 __ovld __cnfn exp2(double2); - double3 __ovld __cnfn exp2(double3); - double4 __ovld __cnfn exp2(double4); - double8 __ovld __cnfn exp2(double8); - double16 __ovld __cnfn exp2(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn exp2(half); - half2 __ovld __cnfn exp2(half2); -@@ -7082,14 +7081,14 @@ float3 __ovld __cnfn exp10(float3); - float4 __ovld __cnfn exp10(float4); - float8 __ovld __cnfn exp10(float8); - float16 __ovld __cnfn exp10(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn exp10(double); - double2 __ovld __cnfn exp10(double2); - double3 __ovld __cnfn exp10(double3); - double4 __ovld __cnfn exp10(double4); - double8 __ovld __cnfn exp10(double8); - double16 __ovld __cnfn exp10(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn exp10(half); - half2 __ovld __cnfn exp10(half2); -@@ -7108,14 +7107,14 @@ float3 __ovld __cnfn expm1(float3 x); - float4 __ovld __cnfn expm1(float4 x); - float8 __ovld __cnfn expm1(float8 x); - float16 __ovld __cnfn expm1(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn expm1(double x); - double2 __ovld __cnfn expm1(double2 x); - double3 __ovld __cnfn expm1(double3 x); - double4 __ovld __cnfn expm1(double4 x); - double8 __ovld __cnfn expm1(double8 x); - double16 __ovld __cnfn expm1(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn expm1(half x); - half2 __ovld __cnfn expm1(half2 x); -@@ -7134,14 +7133,14 @@ float3 __ovld __cnfn fabs(float3); - float4 __ovld __cnfn fabs(float4); - float8 __ovld __cnfn fabs(float8); - float16 __ovld __cnfn fabs(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn fabs(double); - double2 __ovld __cnfn fabs(double2); - double3 __ovld __cnfn fabs(double3); - double4 __ovld __cnfn fabs(double4); - double8 __ovld __cnfn fabs(double8); - double16 __ovld __cnfn fabs(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn fabs(half); - half2 __ovld __cnfn fabs(half2); -@@ -7160,14 +7159,14 @@ float3 __ovld __cnfn fdim(float3 x, float3 y); - float4 __ovld __cnfn fdim(float4 x, float4 y); - float8 __ovld __cnfn fdim(float8 x, float8 y); - float16 __ovld __cnfn fdim(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn fdim(double x, double y); - double2 __ovld __cnfn fdim(double2 x, double2 y); - double3 __ovld __cnfn fdim(double3 x, double3 y); - double4 __ovld __cnfn fdim(double4 x, double4 y); - double8 __ovld __cnfn fdim(double8 x, double8 y); - double16 __ovld __cnfn fdim(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn fdim(half x, half y); - half2 __ovld __cnfn fdim(half2 x, half2 y); -@@ -7187,14 +7186,14 @@ float3 __ovld __cnfn floor(float3); - float4 __ovld __cnfn floor(float4); - float8 __ovld __cnfn floor(float8); - float16 __ovld __cnfn floor(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn floor(double); - double2 __ovld __cnfn floor(double2); - double3 __ovld __cnfn floor(double3); - double4 __ovld __cnfn floor(double4); - double8 __ovld __cnfn floor(double8); - double16 __ovld __cnfn floor(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn floor(half); - half2 __ovld __cnfn floor(half2); -@@ -7217,14 +7216,14 @@ float3 __ovld __cnfn fma(float3 a, float3 b, float3 c); - float4 __ovld __cnfn fma(float4 a, float4 b, float4 c); - float8 __ovld __cnfn fma(float8 a, float8 b, float8 c); - float16 __ovld __cnfn fma(float16 a, float16 b, float16 c); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn fma(double a, double b, double c); - double2 __ovld __cnfn fma(double2 a, double2 b, double2 c); - double3 __ovld __cnfn fma(double3 a, double3 b, double3 c); - double4 __ovld __cnfn fma(double4 a, double4 b, double4 c); - double8 __ovld __cnfn fma(double8 a, double8 b, double8 c); - double16 __ovld __cnfn fma(double16 a, double16 b, double16 c); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn fma(half a, half b, half c); - half2 __ovld __cnfn fma(half2 a, half2 b, half2 c); -@@ -7251,7 +7250,7 @@ float3 __ovld __cnfn fmax(float3 x, float y); - float4 __ovld __cnfn fmax(float4 x, float y); - float8 __ovld __cnfn fmax(float8 x, float y); - float16 __ovld __cnfn fmax(float16 x, float y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn fmax(double x, double y); - double2 __ovld __cnfn fmax(double2 x, double2 y); - double3 __ovld __cnfn fmax(double3 x, double3 y); -@@ -7263,7 +7262,7 @@ double3 __ovld __cnfn fmax(double3 x, double y); - double4 __ovld __cnfn fmax(double4 x, double y); - double8 __ovld __cnfn fmax(double8 x, double y); - double16 __ovld __cnfn fmax(double16 x, double y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn fmax(half x, half y); - half2 __ovld __cnfn fmax(half2 x, half2 y); -@@ -7295,7 +7294,7 @@ float3 __ovld __cnfn fmin(float3 x, float y); - float4 __ovld __cnfn fmin(float4 x, float y); - float8 __ovld __cnfn fmin(float8 x, float y); - float16 __ovld __cnfn fmin(float16 x, float y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn fmin(double x, double y); - double2 __ovld __cnfn fmin(double2 x, double2 y); - double3 __ovld __cnfn fmin(double3 x, double3 y); -@@ -7307,7 +7306,7 @@ double3 __ovld __cnfn fmin(double3 x, double y); - double4 __ovld __cnfn fmin(double4 x, double y); - double8 __ovld __cnfn fmin(double8 x, double y); - double16 __ovld __cnfn fmin(double16 x, double y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn fmin(half x, half y); - half2 __ovld __cnfn fmin(half2 x, half2 y); -@@ -7331,14 +7330,14 @@ float3 __ovld __cnfn fmod(float3 x, float3 y); - float4 __ovld __cnfn fmod(float4 x, float4 y); - float8 __ovld __cnfn fmod(float8 x, float8 y); - float16 __ovld __cnfn fmod(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn fmod(double x, double y); - double2 __ovld __cnfn fmod(double2 x, double2 y); - double3 __ovld __cnfn fmod(double3 x, double3 y); - double4 __ovld __cnfn fmod(double4 x, double4 y); - double8 __ovld __cnfn fmod(double8 x, double8 y); - double16 __ovld __cnfn fmod(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn fmod(half x, half y); - half2 __ovld __cnfn fmod(half2 x, half2 y); -@@ -7352,21 +7351,21 @@ half16 __ovld __cnfn fmod(half16 x, half16 y); - * Returns fmin(x - floor (x), 0x1.fffffep-1f ). - * floor(x) is returned in iptr. - */ --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - float __ovld fract(float x, float *iptr); - float2 __ovld fract(float2 x, float2 *iptr); - float3 __ovld fract(float3 x, float3 *iptr); - float4 __ovld fract(float4 x, float4 *iptr); - float8 __ovld fract(float8 x, float8 *iptr); - float16 __ovld fract(float16 x, float16 *iptr); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld fract(double x, double *iptr); - double2 __ovld fract(double2 x, double2 *iptr); - double3 __ovld fract(double3 x, double3 *iptr); - double4 __ovld fract(double4 x, double4 *iptr); - double8 __ovld fract(double8 x, double8 *iptr); - double16 __ovld fract(double16 x, double16 *iptr); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld fract(half x, half *iptr); - half2 __ovld fract(half2 x, half2 *iptr); -@@ -7375,7 +7374,9 @@ half4 __ovld fract(half4 x, half4 *iptr); - half8 __ovld fract(half8 x, half8 *iptr); - half16 __ovld fract(half16 x, half16 *iptr); - #endif //cl_khr_fp16 --#else -+#endif //__opencl_c_generic_address_space -+ -+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) - float __ovld fract(float x, __global float *iptr); - float2 __ovld fract(float2 x, __global float2 *iptr); - float3 __ovld fract(float3 x, __global float3 *iptr); -@@ -7394,7 +7395,7 @@ float3 __ovld fract(float3 x, __private float3 *iptr); - float4 __ovld fract(float4 x, __private float4 *iptr); - float8 __ovld fract(float8 x, __private float8 *iptr); - float16 __ovld fract(float16 x, __private float16 *iptr); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld fract(double x, __global double *iptr); - double2 __ovld fract(double2 x, __global double2 *iptr); - double3 __ovld fract(double3 x, __global double3 *iptr); -@@ -7413,7 +7414,7 @@ double3 __ovld fract(double3 x, __private double3 *iptr); - double4 __ovld fract(double4 x, __private double4 *iptr); - double8 __ovld fract(double8 x, __private double8 *iptr); - double16 __ovld fract(double16 x, __private double16 *iptr); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld fract(half x, __global half *iptr); - half2 __ovld fract(half2 x, __global half2 *iptr); -@@ -7434,29 +7435,29 @@ half4 __ovld fract(half4 x, __private half4 *iptr); - half8 __ovld fract(half8 x, __private half8 *iptr); - half16 __ovld fract(half16 x, __private half16 *iptr); - #endif //cl_khr_fp16 --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -- -+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != -+ //! CL_VERSION_2_0) - /** - * Extract mantissa and exponent from x. For each - * component the mantissa returned is a float with - * magnitude in the interval [1/2, 1) or 0. Each - * component of x equals mantissa returned * 2^exp. - */ --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - float __ovld frexp(float x, int *exp); - float2 __ovld frexp(float2 x, int2 *exp); - float3 __ovld frexp(float3 x, int3 *exp); - float4 __ovld frexp(float4 x, int4 *exp); - float8 __ovld frexp(float8 x, int8 *exp); - float16 __ovld frexp(float16 x, int16 *exp); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld frexp(double x, int *exp); - double2 __ovld frexp(double2 x, int2 *exp); - double3 __ovld frexp(double3 x, int3 *exp); - double4 __ovld frexp(double4 x, int4 *exp); - double8 __ovld frexp(double8 x, int8 *exp); - double16 __ovld frexp(double16 x, int16 *exp); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld frexp(half x, int *exp); - half2 __ovld frexp(half2 x, int2 *exp); -@@ -7465,7 +7466,9 @@ half4 __ovld frexp(half4 x, int4 *exp); - half8 __ovld frexp(half8 x, int8 *exp); - half16 __ovld frexp(half16 x, int16 *exp); - #endif //cl_khr_fp16 --#else -+#endif //__opencl_c_generic_address_space -+ -+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) - float __ovld frexp(float x, __global int *exp); - float2 __ovld frexp(float2 x, __global int2 *exp); - float3 __ovld frexp(float3 x, __global int3 *exp); -@@ -7484,7 +7487,7 @@ float3 __ovld frexp(float3 x, __private int3 *exp); - float4 __ovld frexp(float4 x, __private int4 *exp); - float8 __ovld frexp(float8 x, __private int8 *exp); - float16 __ovld frexp(float16 x, __private int16 *exp); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld frexp(double x, __global int *exp); - double2 __ovld frexp(double2 x, __global int2 *exp); - double3 __ovld frexp(double3 x, __global int3 *exp); -@@ -7503,7 +7506,7 @@ double3 __ovld frexp(double3 x, __private int3 *exp); - double4 __ovld frexp(double4 x, __private int4 *exp); - double8 __ovld frexp(double8 x, __private int8 *exp); - double16 __ovld frexp(double16 x, __private int16 *exp); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld frexp(half x, __global int *exp); - half2 __ovld frexp(half2 x, __global int2 *exp); -@@ -7524,7 +7527,8 @@ half4 __ovld frexp(half4 x, __private int4 *exp); - half8 __ovld frexp(half8 x, __private int8 *exp); - half16 __ovld frexp(half16 x, __private int16 *exp); - #endif //cl_khr_fp16 --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != -+ //! CL_VERSION_2_0) - - /** - * Compute the value of the square root of x^2 + y^2 -@@ -7536,14 +7540,14 @@ float3 __ovld __cnfn hypot(float3 x, float3 y); - float4 __ovld __cnfn hypot(float4 x, float4 y); - float8 __ovld __cnfn hypot(float8 x, float8 y); - float16 __ovld __cnfn hypot(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn hypot(double x, double y); - double2 __ovld __cnfn hypot(double2 x, double2 y); - double3 __ovld __cnfn hypot(double3 x, double3 y); - double4 __ovld __cnfn hypot(double4 x, double4 y); - double8 __ovld __cnfn hypot(double8 x, double8 y); - double16 __ovld __cnfn hypot(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn hypot(half x, half y); - half2 __ovld __cnfn hypot(half2 x, half2 y); -@@ -7562,14 +7566,14 @@ int3 __ovld __cnfn ilogb(float3 x); - int4 __ovld __cnfn ilogb(float4 x); - int8 __ovld __cnfn ilogb(float8 x); - int16 __ovld __cnfn ilogb(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn ilogb(double x); - int2 __ovld __cnfn ilogb(double2 x); - int3 __ovld __cnfn ilogb(double3 x); - int4 __ovld __cnfn ilogb(double4 x); - int8 __ovld __cnfn ilogb(double8 x); - int16 __ovld __cnfn ilogb(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn ilogb(half x); - int2 __ovld __cnfn ilogb(half2 x); -@@ -7593,7 +7597,7 @@ float3 __ovld __cnfn ldexp(float3 x, int n); - float4 __ovld __cnfn ldexp(float4 x, int n); - float8 __ovld __cnfn ldexp(float8 x, int n); - float16 __ovld __cnfn ldexp(float16 x, int n); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn ldexp(double x, int n); - double2 __ovld __cnfn ldexp(double2 x, int2 n); - double3 __ovld __cnfn ldexp(double3 x, int3 n); -@@ -7605,7 +7609,7 @@ double3 __ovld __cnfn ldexp(double3 x, int n); - double4 __ovld __cnfn ldexp(double4 x, int n); - double8 __ovld __cnfn ldexp(double8 x, int n); - double16 __ovld __cnfn ldexp(double16 x, int n); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn ldexp(half x, int n); - half2 __ovld __cnfn ldexp(half2 x, int2 n); -@@ -7632,14 +7636,14 @@ float3 __ovld __cnfn lgamma(float3 x); - float4 __ovld __cnfn lgamma(float4 x); - float8 __ovld __cnfn lgamma(float8 x); - float16 __ovld __cnfn lgamma(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn lgamma(double x); - double2 __ovld __cnfn lgamma(double2 x); - double3 __ovld __cnfn lgamma(double3 x); - double4 __ovld __cnfn lgamma(double4 x); - double8 __ovld __cnfn lgamma(double8 x); - double16 __ovld __cnfn lgamma(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn lgamma(half x); - half2 __ovld __cnfn lgamma(half2 x); -@@ -7649,21 +7653,21 @@ half8 __ovld __cnfn lgamma(half8 x); - half16 __ovld __cnfn lgamma(half16 x); - #endif //cl_khr_fp16 - --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - float __ovld lgamma_r(float x, int *signp); - float2 __ovld lgamma_r(float2 x, int2 *signp); - float3 __ovld lgamma_r(float3 x, int3 *signp); - float4 __ovld lgamma_r(float4 x, int4 *signp); - float8 __ovld lgamma_r(float8 x, int8 *signp); - float16 __ovld lgamma_r(float16 x, int16 *signp); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld lgamma_r(double x, int *signp); - double2 __ovld lgamma_r(double2 x, int2 *signp); - double3 __ovld lgamma_r(double3 x, int3 *signp); - double4 __ovld lgamma_r(double4 x, int4 *signp); - double8 __ovld lgamma_r(double8 x, int8 *signp); - double16 __ovld lgamma_r(double16 x, int16 *signp); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld lgamma_r(half x, int *signp); - half2 __ovld lgamma_r(half2 x, int2 *signp); -@@ -7672,7 +7676,9 @@ half4 __ovld lgamma_r(half4 x, int4 *signp); - half8 __ovld lgamma_r(half8 x, int8 *signp); - half16 __ovld lgamma_r(half16 x, int16 *signp); - #endif //cl_khr_fp16 --#else -+#endif //__opencl_c_generic_address_space -+ -+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) - float __ovld lgamma_r(float x, __global int *signp); - float2 __ovld lgamma_r(float2 x, __global int2 *signp); - float3 __ovld lgamma_r(float3 x, __global int3 *signp); -@@ -7691,7 +7697,7 @@ float3 __ovld lgamma_r(float3 x, __private int3 *signp); - float4 __ovld lgamma_r(float4 x, __private int4 *signp); - float8 __ovld lgamma_r(float8 x, __private int8 *signp); - float16 __ovld lgamma_r(float16 x, __private int16 *signp); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld lgamma_r(double x, __global int *signp); - double2 __ovld lgamma_r(double2 x, __global int2 *signp); - double3 __ovld lgamma_r(double3 x, __global int3 *signp); -@@ -7710,7 +7716,7 @@ double3 __ovld lgamma_r(double3 x, __private int3 *signp); - double4 __ovld lgamma_r(double4 x, __private int4 *signp); - double8 __ovld lgamma_r(double8 x, __private int8 *signp); - double16 __ovld lgamma_r(double16 x, __private int16 *signp); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld lgamma_r(half x, __global int *signp); - half2 __ovld lgamma_r(half2 x, __global int2 *signp); -@@ -7731,8 +7737,8 @@ half4 __ovld lgamma_r(half4 x, __private int4 *signp); - half8 __ovld lgamma_r(half8 x, __private int8 *signp); - half16 __ovld lgamma_r(half16 x, __private int16 *signp); - #endif //cl_khr_fp16 --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -- -+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != -+ //! CL_VERSION_2_0) - /** - * Compute natural logarithm. - */ -@@ -7742,14 +7748,14 @@ float3 __ovld __cnfn log(float3); - float4 __ovld __cnfn log(float4); - float8 __ovld __cnfn log(float8); - float16 __ovld __cnfn log(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn log(double); - double2 __ovld __cnfn log(double2); - double3 __ovld __cnfn log(double3); - double4 __ovld __cnfn log(double4); - double8 __ovld __cnfn log(double8); - double16 __ovld __cnfn log(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn log(half); - half2 __ovld __cnfn log(half2); -@@ -7760,7 +7766,7 @@ half16 __ovld __cnfn log(half16); - #endif //cl_khr_fp16 - - /** -- * Compute a base 2 logarithm. -+ * Compute a base 2 logarithm - */ - float __ovld __cnfn log2(float); - float2 __ovld __cnfn log2(float2); -@@ -7768,14 +7774,14 @@ float3 __ovld __cnfn log2(float3); - float4 __ovld __cnfn log2(float4); - float8 __ovld __cnfn log2(float8); - float16 __ovld __cnfn log2(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn log2(double); - double2 __ovld __cnfn log2(double2); - double3 __ovld __cnfn log2(double3); - double4 __ovld __cnfn log2(double4); - double8 __ovld __cnfn log2(double8); - double16 __ovld __cnfn log2(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn log2(half); - half2 __ovld __cnfn log2(half2); -@@ -7794,14 +7800,14 @@ float3 __ovld __cnfn log10(float3); - float4 __ovld __cnfn log10(float4); - float8 __ovld __cnfn log10(float8); - float16 __ovld __cnfn log10(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn log10(double); - double2 __ovld __cnfn log10(double2); - double3 __ovld __cnfn log10(double3); - double4 __ovld __cnfn log10(double4); - double8 __ovld __cnfn log10(double8); - double16 __ovld __cnfn log10(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn log10(half); - half2 __ovld __cnfn log10(half2); -@@ -7820,14 +7826,14 @@ float3 __ovld __cnfn log1p(float3 x); - float4 __ovld __cnfn log1p(float4 x); - float8 __ovld __cnfn log1p(float8 x); - float16 __ovld __cnfn log1p(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn log1p(double x); - double2 __ovld __cnfn log1p(double2 x); - double3 __ovld __cnfn log1p(double3 x); - double4 __ovld __cnfn log1p(double4 x); - double8 __ovld __cnfn log1p(double8 x); - double16 __ovld __cnfn log1p(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn log1p(half x); - half2 __ovld __cnfn log1p(half2 x); -@@ -7847,14 +7853,14 @@ float3 __ovld __cnfn logb(float3 x); - float4 __ovld __cnfn logb(float4 x); - float8 __ovld __cnfn logb(float8 x); - float16 __ovld __cnfn logb(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn logb(double x); - double2 __ovld __cnfn logb(double2 x); - double3 __ovld __cnfn logb(double3 x); - double4 __ovld __cnfn logb(double4 x); - double8 __ovld __cnfn logb(double8 x); - double16 __ovld __cnfn logb(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn logb(half x); - half2 __ovld __cnfn logb(half2 x); -@@ -7877,14 +7883,14 @@ float3 __ovld __cnfn mad(float3 a, float3 b, float3 c); - float4 __ovld __cnfn mad(float4 a, float4 b, float4 c); - float8 __ovld __cnfn mad(float8 a, float8 b, float8 c); - float16 __ovld __cnfn mad(float16 a, float16 b, float16 c); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn mad(double a, double b, double c); - double2 __ovld __cnfn mad(double2 a, double2 b, double2 c); - double3 __ovld __cnfn mad(double3 a, double3 b, double3 c); - double4 __ovld __cnfn mad(double4 a, double4 b, double4 c); - double8 __ovld __cnfn mad(double8 a, double8 b, double8 c); - double16 __ovld __cnfn mad(double16 a, double16 b, double16 c); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn mad(half a, half b, half c); - half2 __ovld __cnfn mad(half2 a, half2 b, half2 c); -@@ -7904,14 +7910,14 @@ float3 __ovld __cnfn maxmag(float3 x, float3 y); - float4 __ovld __cnfn maxmag(float4 x, float4 y); - float8 __ovld __cnfn maxmag(float8 x, float8 y); - float16 __ovld __cnfn maxmag(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn maxmag(double x, double y); - double2 __ovld __cnfn maxmag(double2 x, double2 y); - double3 __ovld __cnfn maxmag(double3 x, double3 y); - double4 __ovld __cnfn maxmag(double4 x, double4 y); - double8 __ovld __cnfn maxmag(double8 x, double8 y); - double16 __ovld __cnfn maxmag(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn maxmag(half x, half y); - half2 __ovld __cnfn maxmag(half2 x, half2 y); -@@ -7931,14 +7937,14 @@ float3 __ovld __cnfn minmag(float3 x, float3 y); - float4 __ovld __cnfn minmag(float4 x, float4 y); - float8 __ovld __cnfn minmag(float8 x, float8 y); - float16 __ovld __cnfn minmag(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn minmag(double x, double y); - double2 __ovld __cnfn minmag(double2 x, double2 y); - double3 __ovld __cnfn minmag(double3 x, double3 y); - double4 __ovld __cnfn minmag(double4 x, double4 y); - double8 __ovld __cnfn minmag(double8 x, double8 y); - double16 __ovld __cnfn minmag(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn minmag(half x, half y); - half2 __ovld __cnfn minmag(half2 x, half2 y); -@@ -7955,21 +7961,21 @@ half16 __ovld __cnfn minmag(half16 x, half16 y); - * the argument. It stores the integral part in the object - * pointed to by iptr. - */ --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - float __ovld modf(float x, float *iptr); - float2 __ovld modf(float2 x, float2 *iptr); - float3 __ovld modf(float3 x, float3 *iptr); - float4 __ovld modf(float4 x, float4 *iptr); - float8 __ovld modf(float8 x, float8 *iptr); - float16 __ovld modf(float16 x, float16 *iptr); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld modf(double x, double *iptr); - double2 __ovld modf(double2 x, double2 *iptr); - double3 __ovld modf(double3 x, double3 *iptr); - double4 __ovld modf(double4 x, double4 *iptr); - double8 __ovld modf(double8 x, double8 *iptr); - double16 __ovld modf(double16 x, double16 *iptr); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld modf(half x, half *iptr); - half2 __ovld modf(half2 x, half2 *iptr); -@@ -7978,7 +7984,9 @@ half4 __ovld modf(half4 x, half4 *iptr); - half8 __ovld modf(half8 x, half8 *iptr); - half16 __ovld modf(half16 x, half16 *iptr); - #endif //cl_khr_fp16 --#else -+#endif //__opencl_c_generic_address_space -+ -+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) - float __ovld modf(float x, __global float *iptr); - float2 __ovld modf(float2 x, __global float2 *iptr); - float3 __ovld modf(float3 x, __global float3 *iptr); -@@ -7997,7 +8005,7 @@ float3 __ovld modf(float3 x, __private float3 *iptr); - float4 __ovld modf(float4 x, __private float4 *iptr); - float8 __ovld modf(float8 x, __private float8 *iptr); - float16 __ovld modf(float16 x, __private float16 *iptr); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld modf(double x, __global double *iptr); - double2 __ovld modf(double2 x, __global double2 *iptr); - double3 __ovld modf(double3 x, __global double3 *iptr); -@@ -8016,7 +8024,7 @@ double3 __ovld modf(double3 x, __private double3 *iptr); - double4 __ovld modf(double4 x, __private double4 *iptr); - double8 __ovld modf(double8 x, __private double8 *iptr); - double16 __ovld modf(double16 x, __private double16 *iptr); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld modf(half x, __global half *iptr); - half2 __ovld modf(half2 x, __global half2 *iptr); -@@ -8037,7 +8045,8 @@ half4 __ovld modf(half4 x, __private half4 *iptr); - half8 __ovld modf(half8 x, __private half8 *iptr); - half16 __ovld modf(half16 x, __private half16 *iptr); - #endif //cl_khr_fp16 --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != -+ //! CL_VERSION_2_0) - - /** - * Returns a quiet NaN. The nancode may be placed -@@ -8049,14 +8058,14 @@ float3 __ovld __cnfn nan(uint3 nancode); - float4 __ovld __cnfn nan(uint4 nancode); - float8 __ovld __cnfn nan(uint8 nancode); - float16 __ovld __cnfn nan(uint16 nancode); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn nan(ulong nancode); - double2 __ovld __cnfn nan(ulong2 nancode); - double3 __ovld __cnfn nan(ulong3 nancode); - double4 __ovld __cnfn nan(ulong4 nancode); - double8 __ovld __cnfn nan(ulong8 nancode); - double16 __ovld __cnfn nan(ulong16 nancode); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn nan(ushort nancode); - half2 __ovld __cnfn nan(ushort2 nancode); -@@ -8079,14 +8088,14 @@ float3 __ovld __cnfn nextafter(float3 x, float3 y); - float4 __ovld __cnfn nextafter(float4 x, float4 y); - float8 __ovld __cnfn nextafter(float8 x, float8 y); - float16 __ovld __cnfn nextafter(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn nextafter(double x, double y); - double2 __ovld __cnfn nextafter(double2 x, double2 y); - double3 __ovld __cnfn nextafter(double3 x, double3 y); - double4 __ovld __cnfn nextafter(double4 x, double4 y); - double8 __ovld __cnfn nextafter(double8 x, double8 y); - double16 __ovld __cnfn nextafter(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn nextafter(half x, half y); - half2 __ovld __cnfn nextafter(half2 x, half2 y); -@@ -8105,14 +8114,14 @@ float3 __ovld __cnfn pow(float3 x, float3 y); - float4 __ovld __cnfn pow(float4 x, float4 y); - float8 __ovld __cnfn pow(float8 x, float8 y); - float16 __ovld __cnfn pow(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn pow(double x, double y); - double2 __ovld __cnfn pow(double2 x, double2 y); - double3 __ovld __cnfn pow(double3 x, double3 y); - double4 __ovld __cnfn pow(double4 x, double4 y); - double8 __ovld __cnfn pow(double8 x, double8 y); - double16 __ovld __cnfn pow(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn pow(half x, half y); - half2 __ovld __cnfn pow(half2 x, half2 y); -@@ -8131,14 +8140,14 @@ float3 __ovld __cnfn pown(float3 x, int3 y); - float4 __ovld __cnfn pown(float4 x, int4 y); - float8 __ovld __cnfn pown(float8 x, int8 y); - float16 __ovld __cnfn pown(float16 x, int16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn pown(double x, int y); - double2 __ovld __cnfn pown(double2 x, int2 y); - double3 __ovld __cnfn pown(double3 x, int3 y); - double4 __ovld __cnfn pown(double4 x, int4 y); - double8 __ovld __cnfn pown(double8 x, int8 y); - double16 __ovld __cnfn pown(double16 x, int16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn pown(half x, int y); - half2 __ovld __cnfn pown(half2 x, int2 y); -@@ -8157,14 +8166,14 @@ float3 __ovld __cnfn powr(float3 x, float3 y); - float4 __ovld __cnfn powr(float4 x, float4 y); - float8 __ovld __cnfn powr(float8 x, float8 y); - float16 __ovld __cnfn powr(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn powr(double x, double y); - double2 __ovld __cnfn powr(double2 x, double2 y); - double3 __ovld __cnfn powr(double3 x, double3 y); - double4 __ovld __cnfn powr(double4 x, double4 y); - double8 __ovld __cnfn powr(double8 x, double8 y); - double16 __ovld __cnfn powr(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn powr(half x, half y); - half2 __ovld __cnfn powr(half2 x, half2 y); -@@ -8186,14 +8195,14 @@ float3 __ovld __cnfn remainder(float3 x, float3 y); - float4 __ovld __cnfn remainder(float4 x, float4 y); - float8 __ovld __cnfn remainder(float8 x, float8 y); - float16 __ovld __cnfn remainder(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn remainder(double x, double y); - double2 __ovld __cnfn remainder(double2 x, double2 y); - double3 __ovld __cnfn remainder(double3 x, double3 y); - double4 __ovld __cnfn remainder(double4 x, double4 y); - double8 __ovld __cnfn remainder(double8 x, double8 y); - double16 __ovld __cnfn remainder(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn remainder(half x, half y); - half2 __ovld __cnfn remainder(half2 x, half2 y); -@@ -8215,21 +8224,21 @@ half16 __ovld __cnfn remainder(half16 x, half16 y); - * sign as x/y. It stores this signed value in the object - * pointed to by quo. - */ --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - float __ovld remquo(float x, float y, int *quo); - float2 __ovld remquo(float2 x, float2 y, int2 *quo); - float3 __ovld remquo(float3 x, float3 y, int3 *quo); - float4 __ovld remquo(float4 x, float4 y, int4 *quo); - float8 __ovld remquo(float8 x, float8 y, int8 *quo); - float16 __ovld remquo(float16 x, float16 y, int16 *quo); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld remquo(double x, double y, int *quo); - double2 __ovld remquo(double2 x, double2 y, int2 *quo); - double3 __ovld remquo(double3 x, double3 y, int3 *quo); - double4 __ovld remquo(double4 x, double4 y, int4 *quo); - double8 __ovld remquo(double8 x, double8 y, int8 *quo); - double16 __ovld remquo(double16 x, double16 y, int16 *quo); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld remquo(half x, half y, int *quo); - half2 __ovld remquo(half2 x, half2 y, int2 *quo); -@@ -8237,9 +8246,10 @@ half3 __ovld remquo(half3 x, half3 y, int3 *quo); - half4 __ovld remquo(half4 x, half4 y, int4 *quo); - half8 __ovld remquo(half8 x, half8 y, int8 *quo); - half16 __ovld remquo(half16 x, half16 y, int16 *quo); -- - #endif //cl_khr_fp16 --#else -+#endif //__opencl_c_generic_address_space -+ -+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) - float __ovld remquo(float x, float y, __global int *quo); - float2 __ovld remquo(float2 x, float2 y, __global int2 *quo); - float3 __ovld remquo(float3 x, float3 y, __global int3 *quo); -@@ -8258,7 +8268,7 @@ float3 __ovld remquo(float3 x, float3 y, __private int3 *quo); - float4 __ovld remquo(float4 x, float4 y, __private int4 *quo); - float8 __ovld remquo(float8 x, float8 y, __private int8 *quo); - float16 __ovld remquo(float16 x, float16 y, __private int16 *quo); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld remquo(double x, double y, __global int *quo); - double2 __ovld remquo(double2 x, double2 y, __global int2 *quo); - double3 __ovld remquo(double3 x, double3 y, __global int3 *quo); -@@ -8277,7 +8287,7 @@ double3 __ovld remquo(double3 x, double3 y, __private int3 *quo); - double4 __ovld remquo(double4 x, double4 y, __private int4 *quo); - double8 __ovld remquo(double8 x, double8 y, __private int8 *quo); - double16 __ovld remquo(double16 x, double16 y, __private int16 *quo); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld remquo(half x, half y, __global int *quo); - half2 __ovld remquo(half2 x, half2 y, __global int2 *quo); -@@ -8298,7 +8308,8 @@ half4 __ovld remquo(half4 x, half4 y, __private int4 *quo); - half8 __ovld remquo(half8 x, half8 y, __private int8 *quo); - half16 __ovld remquo(half16 x, half16 y, __private int16 *quo); - #endif //cl_khr_fp16 --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != -+ //! CL_VERSION_2_0) - /** - * Round to integral value (using round to nearest - * even rounding mode) in floating-point format. -@@ -8311,14 +8322,14 @@ float3 __ovld __cnfn rint(float3); - float4 __ovld __cnfn rint(float4); - float8 __ovld __cnfn rint(float8); - float16 __ovld __cnfn rint(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn rint(double); - double2 __ovld __cnfn rint(double2); - double3 __ovld __cnfn rint(double3); - double4 __ovld __cnfn rint(double4); - double8 __ovld __cnfn rint(double8); - double16 __ovld __cnfn rint(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn rint(half); - half2 __ovld __cnfn rint(half2); -@@ -8337,14 +8348,14 @@ float3 __ovld __cnfn rootn(float3 x, int3 y); - float4 __ovld __cnfn rootn(float4 x, int4 y); - float8 __ovld __cnfn rootn(float8 x, int8 y); - float16 __ovld __cnfn rootn(float16 x, int16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn rootn(double x, int y); - double2 __ovld __cnfn rootn(double2 x, int2 y); - double3 __ovld __cnfn rootn(double3 x, int3 y); - double4 __ovld __cnfn rootn(double4 x, int4 y); - double8 __ovld __cnfn rootn(double8 x, int8 y); - double16 __ovld __cnfn rootn(double16 x, int16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn rootn(half x, int y); - half2 __ovld __cnfn rootn(half2 x, int2 y); -@@ -8365,14 +8376,14 @@ float3 __ovld __cnfn round(float3 x); - float4 __ovld __cnfn round(float4 x); - float8 __ovld __cnfn round(float8 x); - float16 __ovld __cnfn round(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn round(double x); - double2 __ovld __cnfn round(double2 x); - double3 __ovld __cnfn round(double3 x); - double4 __ovld __cnfn round(double4 x); - double8 __ovld __cnfn round(double8 x); - double16 __ovld __cnfn round(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn round(half x); - half2 __ovld __cnfn round(half2 x); -@@ -8391,14 +8402,14 @@ float3 __ovld __cnfn rsqrt(float3); - float4 __ovld __cnfn rsqrt(float4); - float8 __ovld __cnfn rsqrt(float8); - float16 __ovld __cnfn rsqrt(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn rsqrt(double); - double2 __ovld __cnfn rsqrt(double2); - double3 __ovld __cnfn rsqrt(double3); - double4 __ovld __cnfn rsqrt(double4); - double8 __ovld __cnfn rsqrt(double8); - double16 __ovld __cnfn rsqrt(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn rsqrt(half); - half2 __ovld __cnfn rsqrt(half2); -@@ -8417,14 +8428,14 @@ float3 __ovld __cnfn sin(float3); - float4 __ovld __cnfn sin(float4); - float8 __ovld __cnfn sin(float8); - float16 __ovld __cnfn sin(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn sin(double); - double2 __ovld __cnfn sin(double2); - double3 __ovld __cnfn sin(double3); - double4 __ovld __cnfn sin(double4); - double8 __ovld __cnfn sin(double8); - double16 __ovld __cnfn sin(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn sin(half); - half2 __ovld __cnfn sin(half2); -@@ -8439,21 +8450,21 @@ half16 __ovld __cnfn sin(half16); - * is the return value and computed cosine is returned - * in cosval. - */ --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - float __ovld sincos(float x, float *cosval); - float2 __ovld sincos(float2 x, float2 *cosval); - float3 __ovld sincos(float3 x, float3 *cosval); - float4 __ovld sincos(float4 x, float4 *cosval); - float8 __ovld sincos(float8 x, float8 *cosval); - float16 __ovld sincos(float16 x, float16 *cosval); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld sincos(double x, double *cosval); - double2 __ovld sincos(double2 x, double2 *cosval); - double3 __ovld sincos(double3 x, double3 *cosval); - double4 __ovld sincos(double4 x, double4 *cosval); - double8 __ovld sincos(double8 x, double8 *cosval); - double16 __ovld sincos(double16 x, double16 *cosval); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld sincos(half x, half *cosval); - half2 __ovld sincos(half2 x, half2 *cosval); -@@ -8462,7 +8473,9 @@ half4 __ovld sincos(half4 x, half4 *cosval); - half8 __ovld sincos(half8 x, half8 *cosval); - half16 __ovld sincos(half16 x, half16 *cosval); - #endif //cl_khr_fp16 --#else -+#endif //__opencl_c_generic_address_space -+ -+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) - float __ovld sincos(float x, __global float *cosval); - float2 __ovld sincos(float2 x, __global float2 *cosval); - float3 __ovld sincos(float3 x, __global float3 *cosval); -@@ -8481,7 +8494,7 @@ float3 __ovld sincos(float3 x, __private float3 *cosval); - float4 __ovld sincos(float4 x, __private float4 *cosval); - float8 __ovld sincos(float8 x, __private float8 *cosval); - float16 __ovld sincos(float16 x, __private float16 *cosval); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld sincos(double x, __global double *cosval); - double2 __ovld sincos(double2 x, __global double2 *cosval); - double3 __ovld sincos(double3 x, __global double3 *cosval); -@@ -8500,7 +8513,7 @@ double3 __ovld sincos(double3 x, __private double3 *cosval); - double4 __ovld sincos(double4 x, __private double4 *cosval); - double8 __ovld sincos(double8 x, __private double8 *cosval); - double16 __ovld sincos(double16 x, __private double16 *cosval); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld sincos(half x, __global half *cosval); - half2 __ovld sincos(half2 x, __global half2 *cosval); -@@ -8521,8 +8534,8 @@ half4 __ovld sincos(half4 x, __private half4 *cosval); - half8 __ovld sincos(half8 x, __private half8 *cosval); - half16 __ovld sincos(half16 x, __private half16 *cosval); - #endif //cl_khr_fp16 --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -- -+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != -+ //! CL_VERSION_2_0) - /** - * Compute hyperbolic sine. - */ -@@ -8532,14 +8545,14 @@ float3 __ovld __cnfn sinh(float3); - float4 __ovld __cnfn sinh(float4); - float8 __ovld __cnfn sinh(float8); - float16 __ovld __cnfn sinh(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn sinh(double); - double2 __ovld __cnfn sinh(double2); - double3 __ovld __cnfn sinh(double3); - double4 __ovld __cnfn sinh(double4); - double8 __ovld __cnfn sinh(double8); - double16 __ovld __cnfn sinh(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn sinh(half); - half2 __ovld __cnfn sinh(half2); -@@ -8558,14 +8571,14 @@ float3 __ovld __cnfn sinpi(float3 x); - float4 __ovld __cnfn sinpi(float4 x); - float8 __ovld __cnfn sinpi(float8 x); - float16 __ovld __cnfn sinpi(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn sinpi(double x); - double2 __ovld __cnfn sinpi(double2 x); - double3 __ovld __cnfn sinpi(double3 x); - double4 __ovld __cnfn sinpi(double4 x); - double8 __ovld __cnfn sinpi(double8 x); - double16 __ovld __cnfn sinpi(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn sinpi(half x); - half2 __ovld __cnfn sinpi(half2 x); -@@ -8584,14 +8597,14 @@ float3 __ovld __cnfn sqrt(float3); - float4 __ovld __cnfn sqrt(float4); - float8 __ovld __cnfn sqrt(float8); - float16 __ovld __cnfn sqrt(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn sqrt(double); - double2 __ovld __cnfn sqrt(double2); - double3 __ovld __cnfn sqrt(double3); - double4 __ovld __cnfn sqrt(double4); - double8 __ovld __cnfn sqrt(double8); - double16 __ovld __cnfn sqrt(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn sqrt(half); - half2 __ovld __cnfn sqrt(half2); -@@ -8610,14 +8623,14 @@ float3 __ovld __cnfn tan(float3); - float4 __ovld __cnfn tan(float4); - float8 __ovld __cnfn tan(float8); - float16 __ovld __cnfn tan(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn tan(double); - double2 __ovld __cnfn tan(double2); - double3 __ovld __cnfn tan(double3); - double4 __ovld __cnfn tan(double4); - double8 __ovld __cnfn tan(double8); - double16 __ovld __cnfn tan(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn tan(half); - half2 __ovld __cnfn tan(half2); -@@ -8636,14 +8649,14 @@ float3 __ovld __cnfn tanh(float3); - float4 __ovld __cnfn tanh(float4); - float8 __ovld __cnfn tanh(float8); - float16 __ovld __cnfn tanh(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn tanh(double); - double2 __ovld __cnfn tanh(double2); - double3 __ovld __cnfn tanh(double3); - double4 __ovld __cnfn tanh(double4); - double8 __ovld __cnfn tanh(double8); - double16 __ovld __cnfn tanh(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn tanh(half); - half2 __ovld __cnfn tanh(half2); -@@ -8662,14 +8675,14 @@ float3 __ovld __cnfn tanpi(float3 x); - float4 __ovld __cnfn tanpi(float4 x); - float8 __ovld __cnfn tanpi(float8 x); - float16 __ovld __cnfn tanpi(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn tanpi(double x); - double2 __ovld __cnfn tanpi(double2 x); - double3 __ovld __cnfn tanpi(double3 x); - double4 __ovld __cnfn tanpi(double4 x); - double8 __ovld __cnfn tanpi(double8 x); - double16 __ovld __cnfn tanpi(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn tanpi(half x); - half2 __ovld __cnfn tanpi(half2 x); -@@ -8688,14 +8701,14 @@ float3 __ovld __cnfn tgamma(float3); - float4 __ovld __cnfn tgamma(float4); - float8 __ovld __cnfn tgamma(float8); - float16 __ovld __cnfn tgamma(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn tgamma(double); - double2 __ovld __cnfn tgamma(double2); - double3 __ovld __cnfn tgamma(double3); - double4 __ovld __cnfn tgamma(double4); - double8 __ovld __cnfn tgamma(double8); - double16 __ovld __cnfn tgamma(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn tgamma(half); - half2 __ovld __cnfn tgamma(half2); -@@ -8715,14 +8728,14 @@ float3 __ovld __cnfn trunc(float3); - float4 __ovld __cnfn trunc(float4); - float8 __ovld __cnfn trunc(float8); - float16 __ovld __cnfn trunc(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn trunc(double); - double2 __ovld __cnfn trunc(double2); - double3 __ovld __cnfn trunc(double3); - double4 __ovld __cnfn trunc(double4); - double8 __ovld __cnfn trunc(double8); - double16 __ovld __cnfn trunc(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn trunc(half); - half2 __ovld __cnfn trunc(half2); -@@ -10108,7 +10121,7 @@ float3 __ovld __cnfn clamp(float3 x, float minval, float maxval); - float4 __ovld __cnfn clamp(float4 x, float minval, float maxval); - float8 __ovld __cnfn clamp(float8 x, float minval, float maxval); - float16 __ovld __cnfn clamp(float16 x, float minval, float maxval); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn clamp(double x, double minval, double maxval); - double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval); - double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval); -@@ -10120,7 +10133,7 @@ double3 __ovld __cnfn clamp(double3 x, double minval, double maxval); - double4 __ovld __cnfn clamp(double4 x, double minval, double maxval); - double8 __ovld __cnfn clamp(double8 x, double minval, double maxval); - double16 __ovld __cnfn clamp(double16 x, double minval, double maxval); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn clamp(half x, half minval, half maxval); - half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval); -@@ -10145,14 +10158,14 @@ float3 __ovld __cnfn degrees(float3 radians); - float4 __ovld __cnfn degrees(float4 radians); - float8 __ovld __cnfn degrees(float8 radians); - float16 __ovld __cnfn degrees(float16 radians); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn degrees(double radians); - double2 __ovld __cnfn degrees(double2 radians); - double3 __ovld __cnfn degrees(double3 radians); - double4 __ovld __cnfn degrees(double4 radians); - double8 __ovld __cnfn degrees(double8 radians); - double16 __ovld __cnfn degrees(double16 radians); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn degrees(half radians); - half2 __ovld __cnfn degrees(half2 radians); -@@ -10177,7 +10190,7 @@ float3 __ovld __cnfn max(float3 x, float y); - float4 __ovld __cnfn max(float4 x, float y); - float8 __ovld __cnfn max(float8 x, float y); - float16 __ovld __cnfn max(float16 x, float y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn max(double x, double y); - double2 __ovld __cnfn max(double2 x, double2 y); - double3 __ovld __cnfn max(double3 x, double3 y); -@@ -10189,7 +10202,7 @@ double3 __ovld __cnfn max(double3 x, double y); - double4 __ovld __cnfn max(double4 x, double y); - double8 __ovld __cnfn max(double8 x, double y); - double16 __ovld __cnfn max(double16 x, double y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn max(half x, half y); - half2 __ovld __cnfn max(half2 x, half2 y); -@@ -10219,7 +10232,7 @@ float3 __ovld __cnfn min(float3 x, float y); - float4 __ovld __cnfn min(float4 x, float y); - float8 __ovld __cnfn min(float8 x, float y); - float16 __ovld __cnfn min(float16 x, float y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn min(double x, double y); - double2 __ovld __cnfn min(double2 x, double2 y); - double3 __ovld __cnfn min(double3 x, double3 y); -@@ -10231,7 +10244,7 @@ double3 __ovld __cnfn min(double3 x, double y); - double4 __ovld __cnfn min(double4 x, double y); - double8 __ovld __cnfn min(double8 x, double y); - double16 __ovld __cnfn min(double16 x, double y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn min(half x, half y); - half2 __ovld __cnfn min(half2 x, half2 y); -@@ -10264,7 +10277,7 @@ float3 __ovld __cnfn mix(float3 x, float3 y, float a); - float4 __ovld __cnfn mix(float4 x, float4 y, float a); - float8 __ovld __cnfn mix(float8 x, float8 y, float a); - float16 __ovld __cnfn mix(float16 x, float16 y, float a); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn mix(double x, double y, double a); - double2 __ovld __cnfn mix(double2 x, double2 y, double2 a); - double3 __ovld __cnfn mix(double3 x, double3 y, double3 a); -@@ -10276,7 +10289,7 @@ double3 __ovld __cnfn mix(double3 x, double3 y, double a); - double4 __ovld __cnfn mix(double4 x, double4 y, double a); - double8 __ovld __cnfn mix(double8 x, double8 y, double a); - double16 __ovld __cnfn mix(double16 x, double16 y, double a); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn mix(half x, half y, half a); - half2 __ovld __cnfn mix(half2 x, half2 y, half2 a); -@@ -10301,14 +10314,14 @@ float3 __ovld __cnfn radians(float3 degrees); - float4 __ovld __cnfn radians(float4 degrees); - float8 __ovld __cnfn radians(float8 degrees); - float16 __ovld __cnfn radians(float16 degrees); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn radians(double degrees); - double2 __ovld __cnfn radians(double2 degrees); - double3 __ovld __cnfn radians(double3 degrees); - double4 __ovld __cnfn radians(double4 degrees); - double8 __ovld __cnfn radians(double8 degrees); - double16 __ovld __cnfn radians(double16 degrees); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn radians(half degrees); - half2 __ovld __cnfn radians(half2 degrees); -@@ -10332,7 +10345,7 @@ float3 __ovld __cnfn step(float edge, float3 x); - float4 __ovld __cnfn step(float edge, float4 x); - float8 __ovld __cnfn step(float edge, float8 x); - float16 __ovld __cnfn step(float edge, float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn step(double edge, double x); - double2 __ovld __cnfn step(double2 edge, double2 x); - double3 __ovld __cnfn step(double3 edge, double3 x); -@@ -10344,7 +10357,7 @@ double3 __ovld __cnfn step(double edge, double3 x); - double4 __ovld __cnfn step(double edge, double4 x); - double8 __ovld __cnfn step(double edge, double8 x); - double16 __ovld __cnfn step(double edge, double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn step(half edge, half x); - half2 __ovld __cnfn step(half2 edge, half2 x); -@@ -10383,7 +10396,7 @@ float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x); - float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x); - float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x); - float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn smoothstep(double edge0, double edge1, double x); - double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x); - double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x); -@@ -10395,7 +10408,7 @@ double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x); - double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x); - double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x); - double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn smoothstep(half edge0, half edge1, half x); - half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x); -@@ -10420,14 +10433,14 @@ float3 __ovld __cnfn sign(float3 x); - float4 __ovld __cnfn sign(float4 x); - float8 __ovld __cnfn sign(float8 x); - float16 __ovld __cnfn sign(float16 x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn sign(double x); - double2 __ovld __cnfn sign(double2 x); - double3 __ovld __cnfn sign(double3 x); - double4 __ovld __cnfn sign(double4 x); - double8 __ovld __cnfn sign(double8 x); - double16 __ovld __cnfn sign(double16 x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn sign(half x); - half2 __ovld __cnfn sign(half2 x); -@@ -10445,10 +10458,10 @@ half16 __ovld __cnfn sign(half16 x); - */ - float4 __ovld __cnfn cross(float4 p0, float4 p1); - float3 __ovld __cnfn cross(float3 p0, float3 p1); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double4 __ovld __cnfn cross(double4 p0, double4 p1); - double3 __ovld __cnfn cross(double3 p0, double3 p1); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half4 __ovld __cnfn cross(half4 p0, half4 p1); - half3 __ovld __cnfn cross(half3 p0, half3 p1); -@@ -10461,12 +10474,12 @@ float __ovld __cnfn dot(float p0, float p1); - float __ovld __cnfn dot(float2 p0, float2 p1); - float __ovld __cnfn dot(float3 p0, float3 p1); - float __ovld __cnfn dot(float4 p0, float4 p1); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn dot(double p0, double p1); - double __ovld __cnfn dot(double2 p0, double2 p1); - double __ovld __cnfn dot(double3 p0, double3 p1); - double __ovld __cnfn dot(double4 p0, double4 p1); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn dot(half p0, half p1); - half __ovld __cnfn dot(half2 p0, half2 p1); -@@ -10482,12 +10495,12 @@ float __ovld __cnfn distance(float p0, float p1); - float __ovld __cnfn distance(float2 p0, float2 p1); - float __ovld __cnfn distance(float3 p0, float3 p1); - float __ovld __cnfn distance(float4 p0, float4 p1); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn distance(double p0, double p1); - double __ovld __cnfn distance(double2 p0, double2 p1); - double __ovld __cnfn distance(double3 p0, double3 p1); - double __ovld __cnfn distance(double4 p0, double4 p1); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn distance(half p0, half p1); - half __ovld __cnfn distance(half2 p0, half2 p1); -@@ -10503,12 +10516,12 @@ float __ovld __cnfn length(float p); - float __ovld __cnfn length(float2 p); - float __ovld __cnfn length(float3 p); - float __ovld __cnfn length(float4 p); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn length(double p); - double __ovld __cnfn length(double2 p); - double __ovld __cnfn length(double3 p); - double __ovld __cnfn length(double4 p); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn length(half p); - half __ovld __cnfn length(half2 p); -@@ -10524,12 +10537,12 @@ float __ovld __cnfn normalize(float p); - float2 __ovld __cnfn normalize(float2 p); - float3 __ovld __cnfn normalize(float3 p); - float4 __ovld __cnfn normalize(float4 p); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn normalize(double p); - double2 __ovld __cnfn normalize(double2 p); - double3 __ovld __cnfn normalize(double3 p); - double4 __ovld __cnfn normalize(double4 p); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn normalize(half p); - half2 __ovld __cnfn normalize(half2 p); -@@ -10610,14 +10623,14 @@ int3 __ovld __cnfn isequal(float3 x, float3 y); - int4 __ovld __cnfn isequal(float4 x, float4 y); - int8 __ovld __cnfn isequal(float8 x, float8 y); - int16 __ovld __cnfn isequal(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn isequal(double x, double y); - long2 __ovld __cnfn isequal(double2 x, double2 y); - long3 __ovld __cnfn isequal(double3 x, double3 y); - long4 __ovld __cnfn isequal(double4 x, double4 y); - long8 __ovld __cnfn isequal(double8 x, double8 y); - long16 __ovld __cnfn isequal(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn isequal(half x, half y); - short2 __ovld __cnfn isequal(half2 x, half2 y); -@@ -10636,14 +10649,14 @@ int3 __ovld __cnfn isnotequal(float3 x, float3 y); - int4 __ovld __cnfn isnotequal(float4 x, float4 y); - int8 __ovld __cnfn isnotequal(float8 x, float8 y); - int16 __ovld __cnfn isnotequal(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn isnotequal(double x, double y); - long2 __ovld __cnfn isnotequal(double2 x, double2 y); - long3 __ovld __cnfn isnotequal(double3 x, double3 y); - long4 __ovld __cnfn isnotequal(double4 x, double4 y); - long8 __ovld __cnfn isnotequal(double8 x, double8 y); - long16 __ovld __cnfn isnotequal(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn isnotequal(half x, half y); - short2 __ovld __cnfn isnotequal(half2 x, half2 y); -@@ -10662,14 +10675,14 @@ int3 __ovld __cnfn isgreater(float3 x, float3 y); - int4 __ovld __cnfn isgreater(float4 x, float4 y); - int8 __ovld __cnfn isgreater(float8 x, float8 y); - int16 __ovld __cnfn isgreater(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn isgreater(double x, double y); - long2 __ovld __cnfn isgreater(double2 x, double2 y); - long3 __ovld __cnfn isgreater(double3 x, double3 y); - long4 __ovld __cnfn isgreater(double4 x, double4 y); - long8 __ovld __cnfn isgreater(double8 x, double8 y); - long16 __ovld __cnfn isgreater(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn isgreater(half x, half y); - short2 __ovld __cnfn isgreater(half2 x, half2 y); -@@ -10688,14 +10701,14 @@ int3 __ovld __cnfn isgreaterequal(float3 x, float3 y); - int4 __ovld __cnfn isgreaterequal(float4 x, float4 y); - int8 __ovld __cnfn isgreaterequal(float8 x, float8 y); - int16 __ovld __cnfn isgreaterequal(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn isgreaterequal(double x, double y); - long2 __ovld __cnfn isgreaterequal(double2 x, double2 y); - long3 __ovld __cnfn isgreaterequal(double3 x, double3 y); - long4 __ovld __cnfn isgreaterequal(double4 x, double4 y); - long8 __ovld __cnfn isgreaterequal(double8 x, double8 y); - long16 __ovld __cnfn isgreaterequal(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn isgreaterequal(half x, half y); - short2 __ovld __cnfn isgreaterequal(half2 x, half2 y); -@@ -10714,14 +10727,14 @@ int3 __ovld __cnfn isless(float3 x, float3 y); - int4 __ovld __cnfn isless(float4 x, float4 y); - int8 __ovld __cnfn isless(float8 x, float8 y); - int16 __ovld __cnfn isless(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn isless(double x, double y); - long2 __ovld __cnfn isless(double2 x, double2 y); - long3 __ovld __cnfn isless(double3 x, double3 y); - long4 __ovld __cnfn isless(double4 x, double4 y); - long8 __ovld __cnfn isless(double8 x, double8 y); - long16 __ovld __cnfn isless(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn isless(half x, half y); - short2 __ovld __cnfn isless(half2 x, half2 y); -@@ -10740,14 +10753,14 @@ int3 __ovld __cnfn islessequal(float3 x, float3 y); - int4 __ovld __cnfn islessequal(float4 x, float4 y); - int8 __ovld __cnfn islessequal(float8 x, float8 y); - int16 __ovld __cnfn islessequal(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn islessequal(double x, double y); - long2 __ovld __cnfn islessequal(double2 x, double2 y); - long3 __ovld __cnfn islessequal(double3 x, double3 y); - long4 __ovld __cnfn islessequal(double4 x, double4 y); - long8 __ovld __cnfn islessequal(double8 x, double8 y); - long16 __ovld __cnfn islessequal(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn islessequal(half x, half y); - short2 __ovld __cnfn islessequal(half2 x, half2 y); -@@ -10767,14 +10780,14 @@ int3 __ovld __cnfn islessgreater(float3 x, float3 y); - int4 __ovld __cnfn islessgreater(float4 x, float4 y); - int8 __ovld __cnfn islessgreater(float8 x, float8 y); - int16 __ovld __cnfn islessgreater(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn islessgreater(double x, double y); - long2 __ovld __cnfn islessgreater(double2 x, double2 y); - long3 __ovld __cnfn islessgreater(double3 x, double3 y); - long4 __ovld __cnfn islessgreater(double4 x, double4 y); - long8 __ovld __cnfn islessgreater(double8 x, double8 y); - long16 __ovld __cnfn islessgreater(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn islessgreater(half x, half y); - short2 __ovld __cnfn islessgreater(half2 x, half2 y); -@@ -10793,14 +10806,14 @@ int3 __ovld __cnfn isfinite(float3); - int4 __ovld __cnfn isfinite(float4); - int8 __ovld __cnfn isfinite(float8); - int16 __ovld __cnfn isfinite(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn isfinite(double); - long2 __ovld __cnfn isfinite(double2); - long3 __ovld __cnfn isfinite(double3); - long4 __ovld __cnfn isfinite(double4); - long8 __ovld __cnfn isfinite(double8); - long16 __ovld __cnfn isfinite(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn isfinite(half); - short2 __ovld __cnfn isfinite(half2); -@@ -10819,14 +10832,14 @@ int3 __ovld __cnfn isinf(float3); - int4 __ovld __cnfn isinf(float4); - int8 __ovld __cnfn isinf(float8); - int16 __ovld __cnfn isinf(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn isinf(double); - long2 __ovld __cnfn isinf(double2); - long3 __ovld __cnfn isinf(double3); - long4 __ovld __cnfn isinf(double4); - long8 __ovld __cnfn isinf(double8); - long16 __ovld __cnfn isinf(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn isinf(half); - short2 __ovld __cnfn isinf(half2); -@@ -10845,14 +10858,14 @@ int3 __ovld __cnfn isnan(float3); - int4 __ovld __cnfn isnan(float4); - int8 __ovld __cnfn isnan(float8); - int16 __ovld __cnfn isnan(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn isnan(double); - long2 __ovld __cnfn isnan(double2); - long3 __ovld __cnfn isnan(double3); - long4 __ovld __cnfn isnan(double4); - long8 __ovld __cnfn isnan(double8); - long16 __ovld __cnfn isnan(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn isnan(half); - short2 __ovld __cnfn isnan(half2); -@@ -10871,14 +10884,14 @@ int3 __ovld __cnfn isnormal(float3); - int4 __ovld __cnfn isnormal(float4); - int8 __ovld __cnfn isnormal(float8); - int16 __ovld __cnfn isnormal(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn isnormal(double); - long2 __ovld __cnfn isnormal(double2); - long3 __ovld __cnfn isnormal(double3); - long4 __ovld __cnfn isnormal(double4); - long8 __ovld __cnfn isnormal(double8); - long16 __ovld __cnfn isnormal(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn isnormal(half); - short2 __ovld __cnfn isnormal(half2); -@@ -10899,14 +10912,14 @@ int3 __ovld __cnfn isordered(float3 x, float3 y); - int4 __ovld __cnfn isordered(float4 x, float4 y); - int8 __ovld __cnfn isordered(float8 x, float8 y); - int16 __ovld __cnfn isordered(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn isordered(double x, double y); - long2 __ovld __cnfn isordered(double2 x, double2 y); - long3 __ovld __cnfn isordered(double3 x, double3 y); - long4 __ovld __cnfn isordered(double4 x, double4 y); - long8 __ovld __cnfn isordered(double8 x, double8 y); - long16 __ovld __cnfn isordered(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn isordered(half x, half y); - short2 __ovld __cnfn isordered(half2 x, half2 y); -@@ -10927,14 +10940,14 @@ int3 __ovld __cnfn isunordered(float3 x, float3 y); - int4 __ovld __cnfn isunordered(float4 x, float4 y); - int8 __ovld __cnfn isunordered(float8 x, float8 y); - int16 __ovld __cnfn isunordered(float16 x, float16 y); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn isunordered(double x, double y); - long2 __ovld __cnfn isunordered(double2 x, double2 y); - long3 __ovld __cnfn isunordered(double3 x, double3 y); - long4 __ovld __cnfn isunordered(double4 x, double4 y); - long8 __ovld __cnfn isunordered(double8 x, double8 y); - long16 __ovld __cnfn isunordered(double16 x, double16 y); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn isunordered(half x, half y); - short2 __ovld __cnfn isunordered(half2 x, half2 y); -@@ -10957,14 +10970,14 @@ int3 __ovld __cnfn signbit(float3); - int4 __ovld __cnfn signbit(float4); - int8 __ovld __cnfn signbit(float8); - int16 __ovld __cnfn signbit(float16); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - int __ovld __cnfn signbit(double); - long2 __ovld __cnfn signbit(double2); - long3 __ovld __cnfn signbit(double3); - long4 __ovld __cnfn signbit(double4); - long8 __ovld __cnfn signbit(double8); - long16 __ovld __cnfn signbit(double16); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - int __ovld __cnfn signbit(half); - short2 __ovld __cnfn signbit(half2); -@@ -11091,14 +11104,14 @@ float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c); - float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c); - float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c); - float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn bitselect(double a, double b, double c); - double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c); - double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c); - double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c); - double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c); - double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn bitselect(half a, half b, half c); - half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c); -@@ -11231,7 +11244,7 @@ ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c); - long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c); - ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c); - --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __cnfn select(double a, double b, long c); - double2 __ovld __cnfn select(double2 a, double2 b, long2 c); - double3 __ovld __cnfn select(double3 a, double3 b, long3 c); -@@ -11244,7 +11257,7 @@ double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c); - double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c); - double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c); - double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - half __ovld __cnfn select(half a, half b, short c); - half2 __ovld __cnfn select(half2 a, half2 b, short2 c); -@@ -11323,13 +11336,13 @@ uint16 __ovld vload16(size_t offset, const __constant uint *p); - long16 __ovld vload16(size_t offset, const __constant long *p); - ulong16 __ovld vload16(size_t offset, const __constant ulong *p); - float16 __ovld vload16(size_t offset, const __constant float *p); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double2 __ovld vload2(size_t offset, const __constant double *p); - double3 __ovld vload3(size_t offset, const __constant double *p); - double4 __ovld vload4(size_t offset, const __constant double *p); - double8 __ovld vload8(size_t offset, const __constant double *p); - double16 __ovld vload16(size_t offset, const __constant double *p); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - #ifdef cl_khr_fp16 - half __ovld vload(size_t offset, const __constant half *p); -@@ -11340,7 +11353,7 @@ half8 __ovld vload8(size_t offset, const __constant half *p); - half16 __ovld vload16(size_t offset, const __constant half *p); - #endif //cl_khr_fp16 - --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - char2 __ovld vload2(size_t offset, const char *p); - uchar2 __ovld vload2(size_t offset, const uchar *p); - short2 __ovld vload2(size_t offset, const short *p); -@@ -11387,13 +11400,13 @@ long16 __ovld vload16(size_t offset, const long *p); - ulong16 __ovld vload16(size_t offset, const ulong *p); - float16 __ovld vload16(size_t offset, const float *p); - --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double2 __ovld vload2(size_t offset, const double *p); - double3 __ovld vload3(size_t offset, const double *p); - double4 __ovld vload4(size_t offset, const double *p); - double8 __ovld vload8(size_t offset, const double *p); - double16 __ovld vload16(size_t offset, const double *p); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - #ifdef cl_khr_fp16 - half __ovld vload(size_t offset, const half *p); -@@ -11403,7 +11416,7 @@ half4 __ovld vload4(size_t offset, const half *p); - half8 __ovld vload8(size_t offset, const half *p); - half16 __ovld vload16(size_t offset, const half *p); - #endif //cl_khr_fp16 --#else -+#endif //__opencl_c_generic_address_space - char2 __ovld vload2(size_t offset, const __global char *p); - uchar2 __ovld vload2(size_t offset, const __global uchar *p); - short2 __ovld vload2(size_t offset, const __global short *p); -@@ -11540,7 +11553,7 @@ long16 __ovld vload16(size_t offset, const __private long *p); - ulong16 __ovld vload16(size_t offset, const __private ulong *p); - float16 __ovld vload16(size_t offset, const __private float *p); - --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double2 __ovld vload2(size_t offset, const __global double *p); - double3 __ovld vload3(size_t offset, const __global double *p); - double4 __ovld vload4(size_t offset, const __global double *p); -@@ -11556,7 +11569,7 @@ double3 __ovld vload3(size_t offset, const __private double *p); - double4 __ovld vload4(size_t offset, const __private double *p); - double8 __ovld vload8(size_t offset, const __private double *p); - double16 __ovld vload16(size_t offset, const __private double *p); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - #ifdef cl_khr_fp16 - half __ovld vload(size_t offset, const __global half *p); -@@ -11578,9 +11591,8 @@ half4 __ovld vload4(size_t offset, const __private half *p); - half8 __ovld vload8(size_t offset, const __private half *p); - half16 __ovld vload16(size_t offset, const __private half *p); - #endif //cl_khr_fp16 --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - void __ovld vstore2(char2 data, size_t offset, char *p); - void __ovld vstore2(uchar2 data, size_t offset, uchar *p); - void __ovld vstore2(short2 data, size_t offset, short *p); -@@ -11626,13 +11638,13 @@ void __ovld vstore16(uint16 data, size_t offset, uint *p); - void __ovld vstore16(long16 data, size_t offset, long *p); - void __ovld vstore16(ulong16 data, size_t offset, ulong *p); - void __ovld vstore16(float16 data, size_t offset, float *p); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - void __ovld vstore2(double2 data, size_t offset, double *p); - void __ovld vstore3(double3 data, size_t offset, double *p); - void __ovld vstore4(double4 data, size_t offset, double *p); - void __ovld vstore8(double8 data, size_t offset, double *p); - void __ovld vstore16(double16 data, size_t offset, double *p); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - void __ovld vstore(half data, size_t offset, half *p); - void __ovld vstore2(half2 data, size_t offset, half *p); -@@ -11641,7 +11653,7 @@ void __ovld vstore4(half4 data, size_t offset, half *p); - void __ovld vstore8(half8 data, size_t offset, half *p); - void __ovld vstore16(half16 data, size_t offset, half *p); - #endif //cl_khr_fp16 --#else -+#endif //__opencl_c_generic_address_space - void __ovld vstore2(char2 data, size_t offset, __global char *p); - void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p); - void __ovld vstore2(short2 data, size_t offset, __global short *p); -@@ -11777,7 +11789,7 @@ void __ovld vstore16(uint16 data, size_t offset, __private uint *p); - void __ovld vstore16(long16 data, size_t offset, __private long *p); - void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p); - void __ovld vstore16(float16 data, size_t offset, __private float *p); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - void __ovld vstore2(double2 data, size_t offset, __global double *p); - void __ovld vstore3(double3 data, size_t offset, __global double *p); - void __ovld vstore4(double4 data, size_t offset, __global double *p); -@@ -11793,7 +11805,7 @@ void __ovld vstore3(double3 data, size_t offset, __private double *p); - void __ovld vstore4(double4 data, size_t offset, __private double *p); - void __ovld vstore8(double8 data, size_t offset, __private double *p); - void __ovld vstore16(double16 data, size_t offset, __private double *p); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - void __ovld vstore(half data, size_t offset, __global half *p); - void __ovld vstore2(half2 data, size_t offset, __global half *p); -@@ -11814,7 +11826,6 @@ void __ovld vstore4(half4 data, size_t offset, __private half *p); - void __ovld vstore8(half8 data, size_t offset, __private half *p); - void __ovld vstore16(half16 data, size_t offset, __private half *p); - #endif //cl_khr_fp16 --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** - * Read sizeof (half) bytes of data from address -@@ -11825,13 +11836,12 @@ void __ovld vstore16(half16 data, size_t offset, __private half *p); - * must be 16-bit aligned. - */ - float __ovld vload_half(size_t offset, const __constant half *p); --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - float __ovld vload_half(size_t offset, const half *p); --#else -+#endif //__opencl_c_generic_address_space - float __ovld vload_half(size_t offset, const __global half *p); - float __ovld vload_half(size_t offset, const __local half *p); - float __ovld vload_half(size_t offset, const __private half *p); --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** - * Read sizeof (halfn) bytes of data from address -@@ -11846,13 +11856,13 @@ float3 __ovld vload_half3(size_t offset, const __constant half *p); - float4 __ovld vload_half4(size_t offset, const __constant half *p); - float8 __ovld vload_half8(size_t offset, const __constant half *p); - float16 __ovld vload_half16(size_t offset, const __constant half *p); --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - float2 __ovld vload_half2(size_t offset, const half *p); - float3 __ovld vload_half3(size_t offset, const half *p); - float4 __ovld vload_half4(size_t offset, const half *p); - float8 __ovld vload_half8(size_t offset, const half *p); - float16 __ovld vload_half16(size_t offset, const half *p); --#else -+#endif //__opencl_c_generic_address_space - float2 __ovld vload_half2(size_t offset, const __global half *p); - float3 __ovld vload_half3(size_t offset, const __global half *p); - float4 __ovld vload_half4(size_t offset, const __global half *p); -@@ -11868,7 +11878,6 @@ float3 __ovld vload_half3(size_t offset, const __private half *p); - float4 __ovld vload_half4(size_t offset, const __private half *p); - float8 __ovld vload_half8(size_t offset, const __private half *p); - float16 __ovld vload_half16(size_t offset, const __private half *p); --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** - * The float value given by data is first -@@ -11881,20 +11890,20 @@ float16 __ovld vload_half16(size_t offset, const __private half *p); - * The default current rounding mode is round to - * nearest even. - */ --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - void __ovld vstore_half(float data, size_t offset, half *p); - void __ovld vstore_half_rte(float data, size_t offset, half *p); - void __ovld vstore_half_rtz(float data, size_t offset, half *p); - void __ovld vstore_half_rtp(float data, size_t offset, half *p); - void __ovld vstore_half_rtn(float data, size_t offset, half *p); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - void __ovld vstore_half(double data, size_t offset, half *p); - void __ovld vstore_half_rte(double data, size_t offset, half *p); - void __ovld vstore_half_rtz(double data, size_t offset, half *p); - void __ovld vstore_half_rtp(double data, size_t offset, half *p); - void __ovld vstore_half_rtn(double data, size_t offset, half *p); --#endif //cl_khr_fp64 --#else -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#endif //__opencl_c_generic_address_space - void __ovld vstore_half(float data, size_t offset, __global half *p); - void __ovld vstore_half_rte(float data, size_t offset, __global half *p); - void __ovld vstore_half_rtz(float data, size_t offset, __global half *p); -@@ -11910,7 +11919,7 @@ void __ovld vstore_half_rte(float data, size_t offset, __private half *p); - void __ovld vstore_half_rtz(float data, size_t offset, __private half *p); - void __ovld vstore_half_rtp(float data, size_t offset, __private half *p); - void __ovld vstore_half_rtn(float data, size_t offset, __private half *p); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - void __ovld vstore_half(double data, size_t offset, __global half *p); - void __ovld vstore_half_rte(double data, size_t offset, __global half *p); - void __ovld vstore_half_rtz(double data, size_t offset, __global half *p); -@@ -11926,8 +11935,7 @@ void __ovld vstore_half_rte(double data, size_t offset, __private half *p); - void __ovld vstore_half_rtz(double data, size_t offset, __private half *p); - void __ovld vstore_half_rtp(double data, size_t offset, __private half *p); - void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); --#endif //cl_khr_fp64 --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - /** - * The floatn value given by data is converted to -@@ -11940,7 +11948,7 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); - * The default current rounding mode is round to - * nearest even. - */ --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - void __ovld vstore_half2(float2 data, size_t offset, half *p); - void __ovld vstore_half3(float3 data, size_t offset, half *p); - void __ovld vstore_half4(float4 data, size_t offset, half *p); -@@ -11966,7 +11974,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p); - void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p); - void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p); - void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - void __ovld vstore_half2(double2 data, size_t offset, half *p); - void __ovld vstore_half3(double3 data, size_t offset, half *p); - void __ovld vstore_half4(double4 data, size_t offset, half *p); -@@ -11992,8 +12000,8 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p); - void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p); - void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p); - void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p); --#endif //cl_khr_fp64 --#else -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#endif //__opencl_c_generic_address_space - void __ovld vstore_half2(float2 data, size_t offset, __global half *p); - void __ovld vstore_half3(float3 data, size_t offset, __global half *p); - void __ovld vstore_half4(float4 data, size_t offset, __global half *p); -@@ -12069,7 +12077,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p); - void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p); - void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p); - void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - void __ovld vstore_half2(double2 data, size_t offset, __global half *p); - void __ovld vstore_half3(double3 data, size_t offset, __global half *p); - void __ovld vstore_half4(double4 data, size_t offset, __global half *p); -@@ -12145,8 +12153,7 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p); - void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p); - void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p); - void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p); --#endif //cl_khr_fp64 --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - /** - * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) -@@ -12167,14 +12174,14 @@ float3 __ovld vloada_half3(size_t offset, const __constant half *p); - float4 __ovld vloada_half4(size_t offset, const __constant half *p); - float8 __ovld vloada_half8(size_t offset, const __constant half *p); - float16 __ovld vloada_half16(size_t offset, const __constant half *p); --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - float __ovld vloada_half(size_t offset, const half *p); - float2 __ovld vloada_half2(size_t offset, const half *p); - float3 __ovld vloada_half3(size_t offset, const half *p); - float4 __ovld vloada_half4(size_t offset, const half *p); - float8 __ovld vloada_half8(size_t offset, const half *p); - float16 __ovld vloada_half16(size_t offset, const half *p); --#else -+#endif //__opencl_c_generic_address_space - float __ovld vloada_half(size_t offset, const __global half *p); - float2 __ovld vloada_half2(size_t offset, const __global half *p); - float3 __ovld vloada_half3(size_t offset, const __global half *p); -@@ -12193,7 +12200,6 @@ float3 __ovld vloada_half3(size_t offset, const __private half *p); - float4 __ovld vloada_half4(size_t offset, const __private half *p); - float8 __ovld vloada_half8(size_t offset, const __private half *p); - float16 __ovld vloada_half16(size_t offset, const __private half *p); --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** - * The floatn value given by data is converted to -@@ -12211,7 +12217,7 @@ float16 __ovld vloada_half16(size_t offset, const __private half *p); - * mode. The default current rounding mode is - * round to nearest even. - */ --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - void __ovld vstorea_half(float data, size_t offset, half *p); - void __ovld vstorea_half2(float2 data, size_t offset, half *p); - void __ovld vstorea_half3(float3 data, size_t offset, half *p); -@@ -12247,7 +12253,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p); - void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p); - void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p); - --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - void __ovld vstorea_half(double data, size_t offset, half *p); - void __ovld vstorea_half2(double2 data, size_t offset, half *p); - void __ovld vstorea_half3(double3 data, size_t offset, half *p); -@@ -12282,9 +12288,9 @@ void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p); - void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p); - void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p); - void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#endif //__opencl_c_generic_address_space - --#else - void __ovld vstorea_half(float data, size_t offset, __global half *p); - void __ovld vstorea_half2(float2 data, size_t offset, __global half *p); - void __ovld vstorea_half3(float3 data, size_t offset, __global half *p); -@@ -12390,7 +12396,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p); - void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p); - void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p); - --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - void __ovld vstorea_half(double data, size_t offset, __global half *p); - void __ovld vstorea_half2(double2 data, size_t offset, __global half *p); - void __ovld vstorea_half3(double3 data, size_t offset, __global half *p); -@@ -12495,8 +12501,7 @@ void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p); - void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p); - void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p); - void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p); --#endif //cl_khr_fp64 --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions - -@@ -12580,7 +12585,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags); - - // OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions - --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_generic_address_space - cl_mem_fence_flags __ovld get_fence(const void *ptr); - cl_mem_fence_flags __ovld get_fence(void *ptr); - -@@ -12591,7 +12596,7 @@ cl_mem_fence_flags __ovld get_fence(void *ptr); - * where gentype is builtin type or user defined type. - */ - --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#endif //__opencl_c_generic_address_space - - // OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch - -@@ -12730,7 +12735,7 @@ event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 - event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event); - event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event); - event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event); - event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event); - event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event); -@@ -12743,7 +12748,7 @@ event_t __ovld async_work_group_copy(__global double3 *dst, const __local double - event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event); - event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event); - event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event); - event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event); -@@ -12893,7 +12898,7 @@ event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local - event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event); - event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event); - event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event); - event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event); - event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event); -@@ -12906,7 +12911,7 @@ event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __loca - event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event); - event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event); - event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event); - event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event); -@@ -12996,14 +13001,14 @@ void __ovld prefetch(const __global uint16 *p, size_t num_elements); - void __ovld prefetch(const __global long16 *p, size_t num_elements); - void __ovld prefetch(const __global ulong16 *p, size_t num_elements); - void __ovld prefetch(const __global float16 *p, size_t num_elements); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - void __ovld prefetch(const __global double *p, size_t num_elements); - void __ovld prefetch(const __global double2 *p, size_t num_elements); - void __ovld prefetch(const __global double3 *p, size_t num_elements); - void __ovld prefetch(const __global double4 *p, size_t num_elements); - void __ovld prefetch(const __global double8 *p, size_t num_elements); - void __ovld prefetch(const __global double16 *p, size_t num_elements); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #ifdef cl_khr_fp16 - void __ovld prefetch(const __global half *p, size_t num_elements); - void __ovld prefetch(const __global half2 *p, size_t num_elements); -@@ -13026,9 +13031,11 @@ void __ovld prefetch(const __global half16 *p, size_t num_elements); - * pointed by p. The function returns old. - */ - int __ovld atomic_add(volatile __global int *p, int val); --unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_add(volatile __global unsigned int *p, -+ unsigned int val); - int __ovld atomic_add(volatile __local int *p, int val); --unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_add(volatile __local unsigned int *p, -+ unsigned int val); - #ifdef __OPENCL_CPP_VERSION__ - int __ovld atomic_add(volatile int *p, int val); - unsigned int __ovld atomic_add(volatile unsigned int *p, unsigned int val); -@@ -13056,9 +13063,11 @@ unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long v - * returns old. - */ - int __ovld atomic_sub(volatile __global int *p, int val); --unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_sub(volatile __global unsigned int *p, -+ unsigned int val); - int __ovld atomic_sub(volatile __local int *p, int val); --unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_sub(volatile __local unsigned int *p, -+ unsigned int val); - #ifdef __OPENCL_CPP_VERSION__ - int __ovld atomic_sub(volatile int *p, int val); - unsigned int __ovld atomic_sub(volatile unsigned int *p, unsigned int val); -@@ -13086,9 +13095,11 @@ unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long v - * value. - */ - int __ovld atomic_xchg(volatile __global int *p, int val); --unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, -+ unsigned int val); - int __ovld atomic_xchg(volatile __local int *p, int val); --unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, -+ unsigned int val); - float __ovld atomic_xchg(volatile __global float *p, float val); - float __ovld atomic_xchg(volatile __local float *p, float val); - #ifdef __OPENCL_CPP_VERSION__ -@@ -13183,12 +13194,15 @@ unsigned long __ovld atom_dec(volatile __local unsigned long *p); - * returns old. - */ - int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val); --unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val); -+unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, -+ unsigned int cmp, unsigned int val); - int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val); --unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val); -+unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, -+ unsigned int cmp, unsigned int val); - #ifdef __OPENCL_CPP_VERSION__ - int __ovld atomic_cmpxchg(volatile int *p, int cmp, int val); --unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, unsigned int val); -+unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, -+ unsigned int val); - #endif - - #if defined(cl_khr_global_int32_base_atomics) -@@ -13215,9 +13229,11 @@ unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned lo - * returns old. - */ - int __ovld atomic_min(volatile __global int *p, int val); --unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_min(volatile __global unsigned int *p, -+ unsigned int val); - int __ovld atomic_min(volatile __local int *p, int val); --unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_min(volatile __local unsigned int *p, -+ unsigned int val); - #ifdef __OPENCL_CPP_VERSION__ - int __ovld atomic_min(volatile int *p, int val); - unsigned int __ovld atomic_min(volatile unsigned int *p, unsigned int val); -@@ -13247,9 +13263,11 @@ unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long v - * returns old. - */ - int __ovld atomic_max(volatile __global int *p, int val); --unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_max(volatile __global unsigned int *p, -+ unsigned int val); - int __ovld atomic_max(volatile __local int *p, int val); --unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_max(volatile __local unsigned int *p, -+ unsigned int val); - #ifdef __OPENCL_CPP_VERSION__ - int __ovld atomic_max(volatile int *p, int val); - unsigned int __ovld atomic_max(volatile unsigned int *p, unsigned int val); -@@ -13278,9 +13296,11 @@ unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long v - * pointed by p. The function returns old. - */ - int __ovld atomic_and(volatile __global int *p, int val); --unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_and(volatile __global unsigned int *p, -+ unsigned int val); - int __ovld atomic_and(volatile __local int *p, int val); --unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_and(volatile __local unsigned int *p, -+ unsigned int val); - #ifdef __OPENCL_CPP_VERSION__ - int __ovld atomic_and(volatile int *p, int val); - unsigned int __ovld atomic_and(volatile unsigned int *p, unsigned int val); -@@ -13309,9 +13329,11 @@ unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long v - * pointed by p. The function returns old. - */ - int __ovld atomic_or(volatile __global int *p, int val); --unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_or(volatile __global unsigned int *p, -+ unsigned int val); - int __ovld atomic_or(volatile __local int *p, int val); --unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_or(volatile __local unsigned int *p, -+ unsigned int val); - #ifdef __OPENCL_CPP_VERSION__ - int __ovld atomic_or(volatile int *p, int val); - unsigned int __ovld atomic_or(volatile unsigned int *p, unsigned int val); -@@ -13340,9 +13362,11 @@ unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long va - * pointed by p. The function returns old. - */ - int __ovld atomic_xor(volatile __global int *p, int val); --unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_xor(volatile __global unsigned int *p, -+ unsigned int val); - int __ovld atomic_xor(volatile __local int *p, int val); --unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val); -+unsigned int __ovld atomic_xor(volatile __local unsigned int *p, -+ unsigned int val); - #ifdef __OPENCL_CPP_VERSION__ - int __ovld atomic_xor(volatile int *p, int val); - unsigned int __ovld atomic_xor(volatile unsigned int *p, unsigned int val); -@@ -13380,120 +13404,78 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v - #endif - - // atomic_init() -+#ifdef __opencl_c_generic_address_space - void __ovld atomic_init(volatile atomic_int *object, int value); - void __ovld atomic_init(volatile atomic_uint *object, uint value); - void __ovld atomic_init(volatile atomic_float *object, float value); - #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) - void __ovld atomic_init(volatile atomic_long *object, long value); - void __ovld atomic_init(volatile atomic_ulong *object, ulong value); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - void __ovld atomic_init(volatile atomic_double *object, double value); --#endif //cl_khr_fp64 --#endif -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // __opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+void __ovld atomic_init(volatile atomic_int __global *object, int value); -+void __ovld atomic_init(volatile atomic_int __local *object, int value); -+void __ovld atomic_init(volatile atomic_uint __global *object, uint value); -+void __ovld atomic_init(volatile atomic_uint __local *object, uint value); -+void __ovld atomic_init(volatile atomic_float __global *object, float value); -+void __ovld atomic_init(volatile atomic_float __local *object, float value); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+void __ovld atomic_init(volatile atomic_long __global *object, long value); -+void __ovld atomic_init(volatile atomic_long __local *object, long value); -+void __ovld atomic_init(volatile atomic_ulong __global *object, ulong value); -+void __ovld atomic_init(volatile atomic_ulong __local *object, ulong value); -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+void __ovld atomic_init(volatile atomic_double __global *object, double value); -+void __ovld atomic_init(volatile atomic_double __local *object, double value); -+#endif // cl_khr_fp64 -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) - - // atomic_work_item_fence() --void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope); -+void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, -+ memory_scope scope); - - // atomic_fetch() -- -+#if defined(__opencl_c_atomic_scope_device) && \ -+ defined(__opencl_c_atomic_order_seq_cst) -+#ifdef __opencl_c_generic_address_space - int __ovld atomic_fetch_add(volatile atomic_int *object, int operand); --int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order); --int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); - uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand); --uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order); --uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); - int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand); --int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order); --int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); - uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand); --uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order); --uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); - int __ovld atomic_fetch_or(volatile atomic_int *object, int operand); --int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order); --int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); - uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand); --uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order); --uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); - int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand); --int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order); --int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); - uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand); --uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order); --uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); - int __ovld atomic_fetch_and(volatile atomic_int *object, int operand); --int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order); --int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); - uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand); --uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order); --uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); - int __ovld atomic_fetch_min(volatile atomic_int *object, int operand); --int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order); --int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); - uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand); --uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order); --uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); --uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand); --uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order); --uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope); - int __ovld atomic_fetch_max(volatile atomic_int *object, int operand); --int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order); --int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); - uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand); --uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order); --uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); --uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand); --uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order); --uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope); - - #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) - long __ovld atomic_fetch_add(volatile atomic_long *object, long operand); --long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order); --long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); - ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand); --ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); --ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); - long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand); --long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order); --long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); - ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand); --ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); --ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); - long __ovld atomic_fetch_or(volatile atomic_long *object, long operand); --long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order); --long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); - ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand); --ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); --ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); - long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand); --long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order); --long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); - ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand); --ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); --ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); - long __ovld atomic_fetch_and(volatile atomic_long *object, long operand); --long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order); --long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); - ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand); --ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); --ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); - long __ovld atomic_fetch_min(volatile atomic_long *object, long operand); --long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order); --long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); - ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand); --ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); --ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); --ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand); --ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order); --ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope); - long __ovld atomic_fetch_max(volatile atomic_long *object, long operand); --long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order); --long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); - ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand); --ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); --ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); --ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand); --ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order); --ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope); - #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) - - // OpenCL v2.0 s6.13.11.7.5: -@@ -13501,196 +13483,2239 @@ ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long opera - // or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. - - #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) --uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand); --uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); --uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); --uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand); --uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); --uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); -- --uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand); --uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); --uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); --uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand); --uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); --uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); --uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand); --uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); --uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); --uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax); --uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); --uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); --uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax); --uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); --uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); -- --intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand); --intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); --intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); --intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand); --intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); --intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); --intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand); --intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); --intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); --intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax); --intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); --intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); --intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax); --intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); --intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); -+uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, -+ ptrdiff_t operand); -+uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, -+ ptrdiff_t operand); -+ -+uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, -+ intptr_t operand); -+uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, -+ intptr_t operand); -+uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, -+ intptr_t operand); -+uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, -+ intptr_t opermax); -+uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, -+ intptr_t opermax); -+ -+intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, -+ uintptr_t operand); -+intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, -+ uintptr_t operand); -+intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, -+ uintptr_t operand); -+intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, -+ uintptr_t opermax); -+intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, -+ uintptr_t opermax); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // __opencl_c_generic_address_space -+ -+#if(__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+int __ovld atomic_fetch_add(volatile atomic_int __global *object, int operand); -+uint __ovld atomic_fetch_add(volatile atomic_uint __local *object, -+ uint operand); -+int __ovld atomic_fetch_sub(volatile atomic_int __global *object, int operand); -+int __ovld atomic_fetch_sub(volatile atomic_int __local *object, int operand); -+uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object, -+ uint operand); -+uint __ovld atomic_fetch_sub(volatile atomic_uint __global *object, -+ uint operand); -+int __ovld atomic_fetch_or(volatile atomic_int __global *object, int operand); -+uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object, -+ uint operand); -+uint __ovld atomic_fetch_or(volatile atomic_uint __global *object, -+ uint operand); -+uint __ovld atomic_fetch_or(volatile atomic_uint __local *object, uint operand); -+int __ovld atomic_fetch_xor(volatile atomic_int __global *object, int operand); -+int __ovld atomic_fetch_xor(volatile atomic_int __local *object, int operand); -+uint __ovld atomic_fetch_xor(volatile atomic_uint __global *object, -+ uint operand); -+uint __ovld atomic_fetch_xor(volatile atomic_uint __local *object, -+ uint operand); -+int __ovld atomic_fetch_and(volatile atomic_int __global *object, int operand); -+int __ovld atomic_fetch_and(volatile atomic_int __local *object, int operand); -+uint __ovld atomic_fetch_and(volatile atomic_uint __global *object, -+ uint operand); -+uint __ovld atomic_fetch_and(volatile atomic_uint __local *object, -+ uint operand); -+int __ovld atomic_fetch_min(volatile atomic_int __global *object, int operand); -+int __ovld atomic_fetch_min(volatile atomic_int __local *object, int operand); -+uint __ovld atomic_fetch_min(volatile atomic_uint __global *object, -+ uint operand); -+uint __ovld atomic_fetch_min(volatile atomic_uint __local *object, -+ uint operand); -+int __ovld atomic_fetch_max(volatile atomic_int __global *object, int operand); -+int __ovld atomic_fetch_max(volatile atomic_int __local *object, int operand); -+uint __ovld atomic_fetch_max(volatile atomic_uint __global *object, -+ uint operand); -+uint __ovld atomic_fetch_max(volatile atomic_uint __local *object, -+ uint operand); -+ -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+long __ovld atomic_fetch_add(volatile atomic_long __global *object, -+ long operand); -+long __ovld atomic_fetch_add(volatile atomic_long __local *object, -+ long operand); -+ulong __ovld atomic_fetch_add(volatile atomic_ulong __global *object, -+ ulong operand); -+ulong __ovld atomic_fetch_add(volatile atomic_ulong __local *object, -+ ulong operand); -+long __ovld atomic_fetch_sub(volatile atomic_long __global *object, -+ long operand); -+long __ovld atomic_fetch_sub(volatile atomic_long __local *object, -+ long operand); -+ulong __ovld atomic_fetch_sub(volatile atomic_ulong __global *object, -+ ulong operand); -+ulong __ovld atomic_fetch_sub(volatile atomic_ulong __local *object, -+ ulong operand); -+long __ovld atomic_fetch_or(volatile atomic_long __global *object, -+ long operand); -+long __ovld atomic_fetch_or(volatile atomic_long __local *object, long operand); -+ulong __ovld atomic_fetch_or(volatile atomic_ulong __global *object, -+ ulong operand); -+ulong __ovld atomic_fetch_or(volatile atomic_ulong __local *object, -+ ulong operand); -+long __ovld atomic_fetch_xor(volatile atomic_long __global *object, -+ long operand); -+long __ovld atomic_fetch_xor(volatile atomic_long __local *object, -+ long operand); -+ulong __ovld atomic_fetch_xor(volatile atomic_ulong __global *object, -+ ulong operand); -+ulong __ovld atomic_fetch_xor(volatile atomic_ulong __local *object, -+ ulong operand); -+long __ovld atomic_fetch_and(volatile atomic_long __global *object, -+ long operand); -+long __ovld atomic_fetch_and(volatile atomic_long __local *object, -+ long operand); -+ulong __ovld atomic_fetch_and(volatile atomic_ulong __global *object, -+ ulong operand); -+ulong __ovld atomic_fetch_and(volatile atomic_ulong __local *object, -+ ulong operand); -+long __ovld atomic_fetch_min(volatile atomic_long __global *object, -+ long operand); -+long __ovld atomic_fetch_min(volatile atomic_long __local *object, -+ long operand); -+ulong __ovld atomic_fetch_min(volatile atomic_ulong __global *object, -+ ulong operand); -+ulong __ovld atomic_fetch_min(volatile atomic_ulong __local *object, -+ ulong operand); -+long __ovld atomic_fetch_max(volatile atomic_long __global *object, -+ long operand); -+long __ovld atomic_fetch_max(volatile atomic_long __local *object, -+ long operand); -+ulong __ovld atomic_fetch_max(volatile atomic_ulong __global *object, -+ ulong operand); -+ulong __ovld atomic_fetch_max(volatile atomic_ulong __local *object, -+ ulong operand); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+ -+// OpenCL v2.0 s6.13.11.7.5: -+// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument -+// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be -+// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. -+ -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __global *object, -+ ptrdiff_t operand); -+uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __local *object, -+ ptrdiff_t operand); -+uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __global *object, -+ ptrdiff_t operand); -+uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __local *object, -+ ptrdiff_t operand); -+ -+uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __global *object, -+ intptr_t operand); -+uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __local *object, -+ intptr_t operand); -+uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __global *object, -+ intptr_t operand); -+uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __local *object, -+ intptr_t operand); -+uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __global *object, -+ intptr_t operand); -+uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __local *object, -+ intptr_t operand); -+uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __global *object, -+ intptr_t opermax); -+uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __local *object, -+ intptr_t opermax); -+uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __global *object, -+ intptr_t opermax); -+uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __local *object, -+ intptr_t opermax); -+ -+intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __global *object, -+ uintptr_t operand); -+intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __local *object, -+ uintptr_t operand); -+intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __global *object, -+ uintptr_t operand); -+intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __local *object, -+ uintptr_t operand); -+intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __global *object, -+ uintptr_t operand); -+intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __local *object, -+ uintptr_t operand); -+intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __global *object, -+ uintptr_t opermax); -+intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __local *object, -+ uintptr_t opermax); -+intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __global *object, -+ uintptr_t opermax); -+intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __local *object, -+ uintptr_t opermax); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+ -+#endif // defined(__opencl_c_atomic_scope_device) && -+ // defined(__opencl_c_atomic_order_seq_cst) -+ -+#ifdef __opencl_c_generic_address_space -+#ifdef __opencl_c_atomic_scope_device -+int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, -+ memory_order order); -+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order); -+int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, -+ memory_order order); -+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order); -+int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, -+ memory_order order); -+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, -+ memory_order order); -+int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, -+ memory_order order); -+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order); -+int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, -+ memory_order order); -+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order); -+int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, -+ memory_order order); -+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order); -+int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, -+ memory_order order); -+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, -+ memory_order order, memory_scope scope); -+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, -+ memory_order order, memory_scope scope); -+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, -+ memory_order order, memory_scope scope); -+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, -+ memory_order order, memory_scope scope); -+int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, -+ memory_order order, memory_scope scope); -+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, -+ memory_order order, memory_scope scope); -+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, -+ memory_order order, memory_scope scope); -+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, -+ memory_order order, memory_scope scope); -+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+ -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#ifdef __opencl_c_atomic_scope_device -+long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, -+ memory_order order); -+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, -+ memory_order order, memory_scope scope); -+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+ -+// OpenCL v2.0 s6.13.11.7.5: -+// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument -+// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be -+// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. -+ -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#ifdef __opencl_c_atomic_scope_device -+uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, -+ ptrdiff_t operand, -+ memory_order order); -+uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, -+ ptrdiff_t operand, -+ memory_order order); -+uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, -+ intptr_t operand, memory_order order); -+uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, -+ intptr_t operand, -+ memory_order order); -+uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, -+ intptr_t operand, -+ memory_order order); -+uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, -+ intptr_t opermax, -+ memory_order minder); -+uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, -+ intptr_t opermax, -+ memory_order minder); -+intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, -+ uintptr_t operand, memory_order order); -+intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, -+ uintptr_t operand, -+ memory_order order); -+intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, -+ uintptr_t operand, -+ memory_order order); -+intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, -+ uintptr_t opermax, -+ memory_order minder); -+intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, -+ uintptr_t opermax, -+ memory_order minder); -+#endif // __opencl_c_atomic_scope_device -+uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, -+ ptrdiff_t operand, -+ memory_order order, -+ memory_scope scope); -+uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, -+ ptrdiff_t operand, -+ memory_order order, -+ memory_scope scope); -+ -+uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, -+ intptr_t operand, memory_order order, -+ memory_scope scope); -+uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, -+ intptr_t operand, memory_order order, -+ memory_scope scope); -+uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, -+ intptr_t operand, memory_order order, -+ memory_scope scope); -+uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, -+ intptr_t opermax, -+ memory_order minder, -+ memory_scope scope); -+uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, -+ intptr_t opermax, -+ memory_order minder, -+ memory_scope scope); -+ -+intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, -+ uintptr_t operand, memory_order order, -+ memory_scope scope); -+intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, -+ uintptr_t operand, memory_order order, -+ memory_scope scope); -+intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, -+ uintptr_t operand, memory_order order, -+ memory_scope scope); -+intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, -+ uintptr_t opermax, -+ memory_order minder, -+ memory_scope scope); -+intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, -+ uintptr_t opermax, -+ memory_order minder, -+ memory_scope scope); - #endif -+#endif // __opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+#ifdef __opencl_c_atomic_scope_device -+int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order); -+int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order); -+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order); -+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order); -+int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order); -+int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order); -+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order); -+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order); -+int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order); -+int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order); -+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order); -+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order); -+int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order); -+int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order); -+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order); -+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order); -+int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order); -+int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order); -+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order); -+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order); -+int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order); -+int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order); -+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order); -+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order); -+int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order); -+int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order); -+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order); -+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object, -+ int operand, memory_order order, -+ memory_scope scope); -+int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object, -+ int operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object, -+ uint operand, memory_order order, -+ memory_scope scope); -+ -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#ifdef __opencl_c_atomic_scope_device -+long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order); -+long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order); -+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order); -+long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order); -+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order); -+long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order); -+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order); -+long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order); -+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order); -+long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order); -+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order); -+long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order); -+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order); -+long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order); -+long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order); -+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order); -+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object, -+ long operand, memory_order order, -+ memory_scope scope); -+long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object, -+ long operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object, -+ ulong operand, memory_order order, -+ memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+ -+// OpenCL v2.0 s6.13.11.7.5: -+// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument -+// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be -+// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. -+ -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#ifdef __opencl_c_atomic_scope_device -+uintptr_t __ovld -+atomic_fetch_add_explicit(volatile atomic_uintptr_t __global *object, -+ ptrdiff_t operand, memory_order order); -+uintptr_t __ovld -+atomic_fetch_add_explicit(volatile atomic_uintptr_t __local *object, -+ ptrdiff_t operand, memory_order order); -+uintptr_t __ovld -+atomic_fetch_sub_explicit(volatile atomic_uintptr_t __global *object, -+ ptrdiff_t operand, memory_order order); -+uintptr_t __ovld -+atomic_fetch_sub_explicit(volatile atomic_uintptr_t __local *object, -+ ptrdiff_t operand, memory_order order); -+uintptr_t __ovld -+atomic_fetch_or_explicit(volatile atomic_uintptr_t __global *object, -+ intptr_t operand, memory_order order); -+uintptr_t __ovld -+atomic_fetch_or_explicit(volatile atomic_uintptr_t __local *object, -+ intptr_t operand, memory_order order); -+uintptr_t __ovld -+atomic_fetch_xor_explicit(volatile atomic_uintptr_t __global *object, -+ intptr_t operand, memory_order order); -+uintptr_t __ovld -+atomic_fetch_xor_explicit(volatile atomic_uintptr_t __local *object, -+ intptr_t operand, memory_order order); -+uintptr_t __ovld -+atomic_fetch_and_explicit(volatile atomic_uintptr_t __global *object, -+ intptr_t operand, memory_order order); -+uintptr_t __ovld -+atomic_fetch_and_explicit(volatile atomic_uintptr_t __local *object, -+ intptr_t operand, memory_order order); -+uintptr_t __ovld -+atomic_fetch_min_explicit(volatile atomic_uintptr_t __global *object, -+ intptr_t opermax, memory_order minder); -+uintptr_t __ovld -+atomic_fetch_min_explicit(volatile atomic_uintptr_t __local *object, -+ intptr_t opermax, memory_order minder); -+uintptr_t __ovld -+atomic_fetch_max_explicit(volatile atomic_uintptr_t __global *object, -+ intptr_t opermax, memory_order minder); -+uintptr_t __ovld -+atomic_fetch_max_explicit(volatile atomic_uintptr_t __local *object, -+ intptr_t opermax, memory_order minder); -+intptr_t __ovld -+atomic_fetch_or_explicit(volatile atomic_intptr_t __global *object, -+ uintptr_t operand, memory_order order); -+intptr_t __ovld -+atomic_fetch_or_explicit(volatile atomic_intptr_t __local *object, -+ uintptr_t operand, memory_order order); -+intptr_t __ovld -+atomic_fetch_xor_explicit(volatile atomic_intptr_t __global *object, -+ uintptr_t operand, memory_order order); -+intptr_t __ovld -+atomic_fetch_xor_explicit(volatile atomic_intptr_t __local *object, -+ uintptr_t operand, memory_order order); -+intptr_t __ovld -+atomic_fetch_and_explicit(volatile atomic_intptr_t __global *object, -+ uintptr_t operand, memory_order order); -+intptr_t __ovld -+atomic_fetch_and_explicit(volatile atomic_intptr_t __local *object, -+ uintptr_t operand, memory_order order); -+intptr_t __ovld -+atomic_fetch_min_explicit(volatile atomic_intptr_t __global *object, -+ uintptr_t opermax, memory_order minder); -+intptr_t __ovld -+atomic_fetch_min_explicit(volatile atomic_intptr_t __local *object, -+ uintptr_t opermax, memory_order minder); -+intptr_t __ovld -+atomic_fetch_max_explicit(volatile atomic_intptr_t __global *object, -+ uintptr_t opermax, memory_order minder); -+intptr_t __ovld -+atomic_fetch_max_explicit(volatile atomic_intptr_t __local *object, -+ uintptr_t opermax, memory_order minder); -+#endif // __opencl_c_atomic_scope_device -+uintptr_t __ovld atomic_fetch_add_explicit( -+ volatile atomic_uintptr_t __global *object, ptrdiff_t operand, -+ memory_order order, memory_scope scope); -+uintptr_t __ovld atomic_fetch_add_explicit( -+ volatile atomic_uintptr_t __local *object, ptrdiff_t operand, -+ memory_order order, memory_scope scope); -+uintptr_t __ovld atomic_fetch_sub_explicit( -+ volatile atomic_uintptr_t __global *object, ptrdiff_t operand, -+ memory_order order, memory_scope scope); -+uintptr_t __ovld atomic_fetch_sub_explicit( -+ volatile atomic_uintptr_t __local *object, ptrdiff_t operand, -+ memory_order order, memory_scope scope); -+ -+uintptr_t __ovld atomic_fetch_or_explicit( -+ volatile atomic_uintptr_t __global *object, intptr_t operand, -+ memory_order order, memory_scope scope); -+uintptr_t __ovld atomic_fetch_or_explicit( -+ volatile atomic_uintptr_t __local *object, intptr_t operand, -+ memory_order order, memory_scope scope); -+uintptr_t __ovld atomic_fetch_xor_explicit( -+ volatile atomic_uintptr_t __global *object, intptr_t operand, -+ memory_order order, memory_scope scope); -+uintptr_t __ovld atomic_fetch_xor_explicit( -+ volatile atomic_uintptr_t __local *object, intptr_t operand, -+ memory_order order, memory_scope scope); -+uintptr_t __ovld atomic_fetch_and_explicit( -+ volatile atomic_uintptr_t __global *object, intptr_t operand, -+ memory_order order, memory_scope scope); -+uintptr_t __ovld atomic_fetch_and_explicit( -+ volatile atomic_uintptr_t __local *object, intptr_t operand, -+ memory_order order, memory_scope scope); -+uintptr_t __ovld atomic_fetch_min_explicit( -+ volatile atomic_uintptr_t __global *object, intptr_t opermax, -+ memory_order minder, memory_scope scope); -+uintptr_t __ovld atomic_fetch_min_explicit( -+ volatile atomic_uintptr_t __local *object, intptr_t opermax, -+ memory_order minder, memory_scope scope); -+uintptr_t __ovld atomic_fetch_max_explicit( -+ volatile atomic_uintptr_t __global *object, intptr_t opermax, -+ memory_order minder, memory_scope scope); -+uintptr_t __ovld atomic_fetch_max_explicit( -+ volatile atomic_uintptr_t __local *object, intptr_t opermax, -+ memory_order minder, memory_scope scope); -+ -+intptr_t __ovld atomic_fetch_or_explicit( -+ volatile atomic_intptr_t __global *object, uintptr_t operand, -+ memory_order order, memory_scope scope); -+intptr_t __ovld atomic_fetch_or_explicit( -+ volatile atomic_intptr_t __local *object, uintptr_t operand, -+ memory_order order, memory_scope scope); -+intptr_t __ovld atomic_fetch_xor_explicit( -+ volatile atomic_intptr_t __global *object, uintptr_t operand, -+ memory_order order, memory_scope scope); -+intptr_t __ovld atomic_fetch_xor_explicit( -+ volatile atomic_intptr_t __local *object, uintptr_t operand, -+ memory_order order, memory_scope scope); -+intptr_t __ovld atomic_fetch_and_explicit( -+ volatile atomic_intptr_t __global *object, uintptr_t operand, -+ memory_order order, memory_scope scope); -+intptr_t __ovld atomic_fetch_and_explicit( -+ volatile atomic_intptr_t __local *object, uintptr_t operand, -+ memory_order order, memory_scope scope); -+intptr_t __ovld atomic_fetch_min_explicit( -+ volatile atomic_intptr_t __global *object, uintptr_t opermax, -+ memory_order minder, memory_scope scope); -+intptr_t __ovld atomic_fetch_min_explicit( -+ volatile atomic_intptr_t __local *object, uintptr_t opermax, -+ memory_order minder, memory_scope scope); -+intptr_t __ovld atomic_fetch_max_explicit( -+ volatile atomic_intptr_t __global *object, uintptr_t opermax, -+ memory_order minder, memory_scope scope); -+intptr_t __ovld atomic_fetch_max_explicit( -+ volatile atomic_intptr_t __local *object, uintptr_t opermax, -+ memory_order minder, memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) - - // atomic_store() - -+#if defined(__opencl_c_atomic_scope_device) && \ -+ defined(__opencl_c_atomic_order_seq_cst) -+ -+#ifdef __opencl_c_generic_address_space - void __ovld atomic_store(volatile atomic_int *object, int desired); --void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order); --void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); - void __ovld atomic_store(volatile atomic_uint *object, uint desired); --void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order); --void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); - void __ovld atomic_store(volatile atomic_float *object, float desired); --void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order); --void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); - #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - void __ovld atomic_store(volatile atomic_double *object, double desired); --void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order); --void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); --#endif //cl_khr_fp64 -+#endif - void __ovld atomic_store(volatile atomic_long *object, long desired); --void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order); --void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); - void __ovld atomic_store(volatile atomic_ulong *object, ulong desired); --void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); --void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // __opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+void __ovld atomic_store(volatile atomic_int __global *object, int desired); -+void __ovld atomic_store(volatile atomic_int __local *object, int desired); -+void __ovld atomic_store(volatile atomic_uint __global *object, uint desired); -+void __ovld atomic_store(volatile atomic_uint __local *object, uint desired); -+void __ovld atomic_store(volatile atomic_float __global *object, float desired); -+void __ovld atomic_store(volatile atomic_float __local *object, float desired); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+void __ovld atomic_store(volatile atomic_double __global *object, -+ double desired); -+void __ovld atomic_store(volatile atomic_double __local *object, -+ double desired); - #endif -+void __ovld atomic_store(volatile atomic_long __global *object, long desired); -+void __ovld atomic_store(volatile atomic_long __local *object, long desired); -+void __ovld atomic_store(volatile atomic_ulong __global *object, ulong desired); -+void __ovld atomic_store(volatile atomic_ulong __local *object, ulong desired); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+ -+#endif // defined(__opencl_c_atomic_scope_device) && -+ // defined(__opencl_c_atomic_order_seq_cst) -+ -+#ifdef __opencl_c_generic_address_space -+#ifdef __opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, -+ memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, -+ memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, -+ memory_order order); -+#endif // __opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, -+ memory_order order, memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, -+ memory_order order, memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, -+ memory_order order, memory_scope scope); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_double *object, -+ double desired, memory_order order); -+#endif //__opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_double *object, -+ double desired, memory_order order, -+ memory_scope scope); -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, -+ memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, -+ memory_order order); -+#endif //__opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, -+ memory_order order, memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, -+ memory_order order, memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // __opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+#ifdef __opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_int __global *object, -+ int desired, memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_int __local *object, -+ int desired, memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_uint __global *object, -+ uint desired, memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_uint __local *object, -+ uint desired, memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_float __global *object, -+ float desired, memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_float __local *object, -+ float desired, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_int __global *object, -+ int desired, memory_order order, -+ memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_int __local *object, -+ int desired, memory_order order, -+ memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_uint __global *object, -+ uint desired, memory_order order, -+ memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_uint __local *object, -+ uint desired, memory_order order, -+ memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_float __global *object, -+ float desired, memory_order order, -+ memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_float __local *object, -+ float desired, memory_order order, -+ memory_scope scope); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_double __global *object, -+ double desired, memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_double __local *object, -+ double desired, memory_order order); -+#endif //__opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_double __global *object, -+ double desired, memory_order order, -+ memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_double __local *object, -+ double desired, memory_order order, -+ memory_scope scope); -+#endif // cl_khr_fp64 -+#ifdef __opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_long __global *object, -+ long desired, memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_long __local *object, -+ long desired, memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_ulong __global *object, -+ ulong desired, memory_order order); -+void __ovld atomic_store_explicit(volatile atomic_ulong __local *object, -+ ulong desired, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+void __ovld atomic_store_explicit(volatile atomic_long __global *object, -+ long desired, memory_order order, -+ memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_long __local *object, -+ long desired, memory_order order, -+ memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_ulong __global *object, -+ ulong desired, memory_order order, -+ memory_scope scope); -+void __ovld atomic_store_explicit(volatile atomic_ulong __local *object, -+ ulong desired, memory_order order, -+ memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) - - // atomic_load() -- -+#ifdef __opencl_c_generic_address_space -+#if defined(__opencl_c_atomic_scope_device) && \ -+ defined(__opencl_c_atomic_order_seq_cst) - int __ovld atomic_load(volatile atomic_int *object); --int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order); --int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope); - uint __ovld atomic_load(volatile atomic_uint *object); --uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order); --uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope); - float __ovld atomic_load(volatile atomic_float *object); --float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order); --float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope); - #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld atomic_load(volatile atomic_double *object); --double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order); --double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope); --#endif //cl_khr_fp64 -+#endif // cl_khr_fp64 - long __ovld atomic_load(volatile atomic_long *object); --long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order); --long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope); - ulong __ovld atomic_load(volatile atomic_ulong *object); --ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order); --ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope); --#endif -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // defined(__opencl_c_atomic_scope_device) && -+ // defined(__opencl_c_atomic_order_seq_cst) -+#endif //__opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+#if defined(__opencl_c_atomic_scope_device) && \ -+ defined(__opencl_c_atomic_order_seq_cst) -+int __ovld atomic_load(volatile atomic_int __global *object); -+int __ovld atomic_load(volatile atomic_int __local *object); -+uint __ovld atomic_load(volatile atomic_uint __global *object); -+uint __ovld atomic_load(volatile atomic_uint __local *object); -+float __ovld atomic_load(volatile atomic_float __global *object); -+float __ovld atomic_load(volatile atomic_float __local *object); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+double __ovld atomic_load(volatile atomic_double __global *object); -+double __ovld atomic_load(volatile atomic_double __local *object); -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+long __ovld atomic_load(volatile atomic_long __global *object); -+long __ovld atomic_load(volatile atomic_long __local *object); -+ulong __ovld atomic_load(volatile atomic_ulong __global *object); -+ulong __ovld atomic_load(volatile atomic_ulong __local *object); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // defined(__opencl_c_atomic_scope_device) && -+ // defined(__opencl_c_atomic_order_seq_cst) -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+ -+#ifdef __opencl_c_generic_address_space -+#ifdef __opencl_c_atomic_scope_device -+int __ovld atomic_load_explicit(volatile atomic_int *object, -+ memory_order order); -+uint __ovld atomic_load_explicit(volatile atomic_uint *object, -+ memory_order order); -+float __ovld atomic_load_explicit(volatile atomic_float *object, -+ memory_order order); -+#endif // __opencl_c_atomic_scope_device -+int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_load_explicit(volatile atomic_uint *object, -+ memory_order order, memory_scope scope); -+float __ovld atomic_load_explicit(volatile atomic_float *object, -+ memory_order order, memory_scope scope); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+double __ovld atomic_load_explicit(volatile atomic_double *object, -+ memory_order order); -+#endif // __opencl_c_atomic_scope_device -+double __ovld atomic_load_explicit(volatile atomic_double *object, -+ memory_order order, memory_scope scope); -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+long __ovld atomic_load_explicit(volatile atomic_long *object, -+ memory_order order); -+ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, -+ memory_order order); -+#endif //__opencl_c_atomic_scope_device -+long __ovld atomic_load_explicit(volatile atomic_long *object, -+ memory_order order, memory_scope scope); -+ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, -+ memory_order order, memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // __opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+#ifdef __opencl_c_atomic_scope_device -+int __ovld atomic_load_explicit(volatile atomic_int __global *object, -+ memory_order order); -+int __ovld atomic_load_explicit(volatile atomic_int __local *object, -+ memory_order order); -+uint __ovld atomic_load_explicit(volatile atomic_uint __global *object, -+ memory_order order); -+uint __ovld atomic_load_explicit(volatile atomic_uint __local *object, -+ memory_order order); -+float __ovld atomic_load_explicit(volatile atomic_float __global *object, -+ memory_order order); -+float __ovld atomic_load_explicit(volatile atomic_float __local *object, -+ memory_order order); -+#endif // __opencl_c_atomic_scope_device -+int __ovld atomic_load_explicit(volatile atomic_int __global *object, -+ memory_order order, memory_scope scope); -+int __ovld atomic_load_explicit(volatile atomic_int __local *object, -+ memory_order order, memory_scope scope); -+uint __ovld atomic_load_explicit(volatile atomic_uint __global *object, -+ memory_order order, memory_scope scope); -+uint __ovld atomic_load_explicit(volatile atomic_uint __local *object, -+ memory_order order, memory_scope scope); -+float __ovld atomic_load_explicit(volatile atomic_float __global *object, -+ memory_order order, memory_scope scope); -+float __ovld atomic_load_explicit(volatile atomic_float __local *object, -+ memory_order order, memory_scope scope); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+double __ovld atomic_load_explicit(volatile atomic_double __global *object, -+ memory_order order); -+double __ovld atomic_load_explicit(volatile atomic_double __local *object, -+ memory_order order); -+#endif // __opencl_c_atomic_scope_device -+double __ovld atomic_load_explicit(volatile atomic_double __global *object, -+ memory_order order, memory_scope scope); -+double __ovld atomic_load_explicit(volatile atomic_double __local *object, -+ memory_order order, memory_scope scope); -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+long __ovld atomic_load_explicit(volatile atomic_long __global *object, -+ memory_order order); -+long __ovld atomic_load_explicit(volatile atomic_long __local *object, -+ memory_order order); -+ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object, -+ memory_order order); -+ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object, -+ memory_order order); -+#endif // __opencl_c_atomic_scope_device -+long __ovld atomic_load_explicit(volatile atomic_long __global *object, -+ memory_order order, memory_scope scope); -+long __ovld atomic_load_explicit(volatile atomic_long __local *object, -+ memory_order order, memory_scope scope); -+ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object, -+ memory_order order, memory_scope scope); -+ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object, -+ memory_order order, memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) - - // atomic_exchange() -- -+#ifdef __opencl_c_generic_address_space -+#if defined(__opencl_c_atomic_scope_device) && \ -+ defined(__opencl_c_atomic_order_seq_cst) - int __ovld atomic_exchange(volatile atomic_int *object, int desired); --int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order); --int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); - uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired); --uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order); --uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); - float __ovld atomic_exchange(volatile atomic_float *object, float desired); --float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order); --float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); - #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld atomic_exchange(volatile atomic_double *object, double desired); --double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order); --double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); --#endif //cl_khr_fp64 -+#endif - long __ovld atomic_exchange(volatile atomic_long *object, long desired); --long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order); --long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); - ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired); --ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); --ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // defined(__opencl_c_atomic_scope_device) && -+ // defined(__opencl_c_atomic_order_seq_cst) -+#endif // __opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+#if defined(__opencl_c_atomic_scope_device) && \ -+ defined(__opencl_c_atomic_order_seq_cst) -+int __ovld atomic_exchange(volatile atomic_int __global *object, int desired); -+int __ovld atomic_exchange(volatile atomic_int __local *object, int desired); -+uint __ovld atomic_exchange(volatile atomic_uint __global *object, -+ uint desired); -+uint __ovld atomic_exchange(volatile atomic_uint __local *object, uint desired); -+float __ovld atomic_exchange(volatile atomic_float __global *object, -+ float desired); -+float __ovld atomic_exchange(volatile atomic_float __local *object, -+ float desired); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+double __ovld atomic_exchange(volatile atomic_double __global *object, -+ double desired); -+double __ovld atomic_exchange(volatile atomic_double __local *object, -+ double desired); - #endif -+long __ovld atomic_exchange(volatile atomic_long __global *object, -+ long desired); -+long __ovld atomic_exchange(volatile atomic_long __local *object, long desired); -+ulong __ovld atomic_exchange(volatile atomic_ulong __global *object, -+ ulong desired); -+ulong __ovld atomic_exchange(volatile atomic_ulong __local *object, -+ ulong desired); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // defined(__opencl_c_atomic_scope_device) && -+ // defined(__opencl_c_atomic_order_seq_cst) -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+ -+#ifdef __opencl_c_generic_address_space -+#ifdef __opencl_c_atomic_scope_device -+int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, -+ memory_order order); -+uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, -+ memory_order order); -+float __ovld atomic_exchange_explicit(volatile atomic_float *object, -+ float desired, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, -+ memory_order order, memory_scope scope); -+uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, -+ memory_order order, memory_scope scope); -+float __ovld atomic_exchange_explicit(volatile atomic_float *object, -+ float desired, memory_order order, -+ memory_scope scope); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+double __ovld atomic_exchange_explicit(volatile atomic_double *object, -+ double desired, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+double __ovld atomic_exchange_explicit(volatile atomic_double *object, -+ double desired, memory_order order, -+ memory_scope scope); -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, -+ memory_order order); -+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, -+ ulong desired, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, -+ memory_order order, memory_scope scope); -+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, -+ ulong desired, memory_order order, -+ memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // __opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+#ifdef __opencl_c_atomic_scope_device -+int __ovld atomic_exchange_explicit(volatile atomic_int __global *object, -+ int desired, memory_order order); -+int __ovld atomic_exchange_explicit(volatile atomic_int __local *object, -+ int desired, memory_order order); -+uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object, -+ uint desired, memory_order order); -+uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object, -+ uint desired, memory_order order); -+float __ovld atomic_exchange_explicit(volatile atomic_float __global *object, -+ float desired, memory_order order); -+float __ovld atomic_exchange_explicit(volatile atomic_float __local *object, -+ float desired, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+int __ovld atomic_exchange_explicit(volatile atomic_int __global *object, -+ int desired, memory_order order, -+ memory_scope scope); -+int __ovld atomic_exchange_explicit(volatile atomic_int __local *object, -+ int desired, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object, -+ uint desired, memory_order order, -+ memory_scope scope); -+uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object, -+ uint desired, memory_order order, -+ memory_scope scope); -+float __ovld atomic_exchange_explicit(volatile atomic_float __global *object, -+ float desired, memory_order order, -+ memory_scope scope); -+float __ovld atomic_exchange_explicit(volatile atomic_float __local *object, -+ float desired, memory_order order, -+ memory_scope scope); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+double __ovld atomic_exchange_explicit(volatile atomic_double __global *object, -+ double desired, memory_order order); -+double __ovld atomic_exchange_explicit(volatile atomic_double __local *object, -+ double desired, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+double __ovld atomic_exchange_explicit(volatile atomic_double __global *object, -+ double desired, memory_order order, -+ memory_scope scope); -+double __ovld atomic_exchange_explicit(volatile atomic_double __local *object, -+ double desired, memory_order order, -+ memory_scope scope); -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+long __ovld atomic_exchange_explicit(volatile atomic_long __global *object, -+ long desired, memory_order order); -+long __ovld atomic_exchange_explicit(volatile atomic_long __local *object, -+ long desired, memory_order order); -+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object, -+ ulong desired, memory_order order); -+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object, -+ ulong desired, memory_order order); -+#endif // __opencl_c_atomic_scope_device -+long __ovld atomic_exchange_explicit(volatile atomic_long __global *object, -+ long desired, memory_order order, -+ memory_scope scope); -+long __ovld atomic_exchange_explicit(volatile atomic_long __local *object, -+ long desired, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object, -+ ulong desired, memory_order order, -+ memory_scope scope); -+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object, -+ ulong desired, memory_order order, -+ memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) - - // atomic_compare_exchange_strong() and atomic_compare_exchange_weak() -- --bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, -- int desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, -- int desired, memory_order success, memory_order failure, memory_scope scope); --bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, -- uint desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, -- uint desired, memory_order success, memory_order failure, memory_scope scope); --bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, -- int desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, -- int desired, memory_order success, memory_order failure, memory_scope scope); --bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, -- uint desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, -- uint desired, memory_order success, memory_order failure, memory_scope scope); --bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, -- float desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, -- float desired, memory_order success, memory_order failure, memory_scope scope); --bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, -- float desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, -- float desired, memory_order success, memory_order failure, memory_scope scope); -+#ifdef __opencl_c_generic_address_space -+#if defined(__opencl_c_atomic_scope_device) && \ -+ defined(__opencl_c_atomic_order_seq_cst) -+bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, -+ int *expected, int desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, -+ uint *expected, uint desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, -+ int *expected, int desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, -+ uint *expected, uint desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, -+ float *expected, float desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, -+ float *expected, float desired); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, -+ double *expected, double desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, -+ double *expected, double desired); -+#endif -+bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, -+ long *expected, long desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, -+ long *expected, long desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, -+ ulong *expected, ulong desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, -+ ulong *expected, ulong desired); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // defined(__opencl_c_atomic_scope_device) && -+ // defined(__opencl_c_atomic_order_seq_cst) -+#endif // __opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+#if defined(__opencl_c_atomic_scope_device) && \ -+ defined(__opencl_c_atomic_order_seq_cst) -+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, -+ int __global *expected, int desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, -+ int __global *expected, int desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, -+ int __local *expected, int desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, -+ int __local *expected, int desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, -+ int __private *expected, -+ int desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, -+ int __private *expected, -+ int desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_uint __global *object, -+ uint __global *expected, uint desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, -+ uint __global *expected, -+ uint desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_uint __global *object, -+ uint __local *expected, uint desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, -+ uint __local *expected, -+ uint desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_uint __global *object, -+ uint __private *expected, uint desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, -+ uint __private *expected, -+ uint desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, -+ int __global *expected, int desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, -+ int __global *expected, int desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, -+ int __local *expected, int desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, -+ int __local *expected, int desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, -+ int __private *expected, int desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, -+ int __private *expected, int desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, -+ uint __global *expected, uint desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, -+ uint __global *expected, uint desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, -+ uint __local *expected, uint desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, -+ uint __local *expected, uint desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, -+ uint __private *expected, -+ uint desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, -+ uint __private *expected, -+ uint desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_float __global *object, -+ float __global *expected, float desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_float __local *object, -+ float __global *expected, float desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_float __global *object, -+ float __local *expected, float desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_float __local *object, -+ float __local *expected, float desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_float __global *object, -+ float __private *expected, float desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_float __local *object, -+ float __private *expected, float desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, -+ float __global *expected, -+ float desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, -+ float __global *expected, -+ float desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, -+ float __local *expected, -+ float desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, -+ float __local *expected, -+ float desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, -+ float __private *expected, -+ float desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, -+ float __private *expected, -+ float desired); - #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) --#ifdef cl_khr_fp64 --bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, -- double desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, -- double desired, memory_order success, memory_order failure, memory_scope scope); --bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, -- double desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, -- double desired, memory_order success, memory_order failure, memory_scope scope); --#endif //cl_khr_fp64 --bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, -- long desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, -- long desired, memory_order success, memory_order failure, memory_scope scope); --bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, -- long desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, -- long desired, memory_order success, memory_order failure, memory_scope scope); --bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, -- ulong desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, -- ulong desired, memory_order success, memory_order failure, memory_scope scope); --bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, -- ulong desired, memory_order success, memory_order failure); --bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, -- ulong desired, memory_order success, memory_order failure, memory_scope scope); -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_double __global *object, -+ double __global *expected, double desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_double __local *object, -+ double __global *expected, double desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_double __global *object, -+ double __local *expected, double desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_double __local *object, -+ double __local *expected, double desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_double __global *object, -+ double __private *expected, double desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_double __local *object, -+ double __private *expected, double desired); -+bool __ovld -+atomic_compare_exchange_weak(volatile atomic_double __global *object, -+ double __global *expected, double desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, -+ double __global *expected, -+ double desired); -+bool __ovld -+atomic_compare_exchange_weak(volatile atomic_double __global *object, -+ double __local *expected, double desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, -+ double __local *expected, -+ double desired); -+bool __ovld -+atomic_compare_exchange_weak(volatile atomic_double __global *object, -+ double __private *expected, double desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, -+ double __private *expected, -+ double desired); - #endif -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_long __global *object, -+ long __global *expected, long desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, -+ long __global *expected, -+ long desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_long __global *object, -+ long __local *expected, long desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, -+ long __local *expected, -+ long desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_long __global *object, -+ long __private *expected, long desired); -+bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, -+ long __private *expected, -+ long desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, -+ long __global *expected, long desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, -+ long __global *expected, long desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, -+ long __local *expected, long desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, -+ long __local *expected, long desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, -+ long __private *expected, -+ long desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, -+ long __private *expected, -+ long desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_ulong __global *object, -+ ulong __global *expected, ulong desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_ulong __local *object, -+ ulong __global *expected, ulong desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_ulong __global *object, -+ ulong __local *expected, ulong desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_ulong __local *object, -+ ulong __local *expected, ulong desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_ulong __global *object, -+ ulong __private *expected, ulong desired); -+bool __ovld -+atomic_compare_exchange_strong(volatile atomic_ulong __local *object, -+ ulong __private *expected, ulong desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, -+ ulong __global *expected, -+ ulong desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, -+ ulong __global *expected, -+ ulong desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, -+ ulong __local *expected, -+ ulong desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, -+ ulong __local *expected, -+ ulong desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, -+ ulong __private *expected, -+ ulong desired); -+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, -+ ulong __private *expected, -+ ulong desired); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // defined(__opencl_c_atomic_scope_device) && -+ // defined(__opencl_c_atomic_order_seq_cst) -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+ -+#ifdef __opencl_c_generic_address_space -+#ifdef __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, -+ int *expected, int desired, -+ memory_order success, -+ memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint *object, uint *expected, uint desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, -+ int *expected, int desired, -+ memory_order success, -+ memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, -+ uint *expected, uint desired, -+ memory_order success, -+ memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float *object, float *expected, float desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, -+ float *expected, -+ float desired, -+ memory_order success, -+ memory_order failure); -+#endif // __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, -+ int *expected, int desired, -+ memory_order success, -+ memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint *object, uint *expected, uint desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, -+ int *expected, int desired, -+ memory_order success, -+ memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, -+ uint *expected, uint desired, -+ memory_order success, -+ memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float *object, float *expected, float desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float *object, float *expected, float desired, -+ memory_order success, memory_order failure, memory_scope scope); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double *object, double *expected, double desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double *object, double *expected, double desired, -+ memory_order success, memory_order failure); -+#endif // __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double *object, double *expected, double desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double *object, double *expected, double desired, -+ memory_order success, memory_order failure, memory_scope scope); -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long *object, long *expected, long desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, -+ long *expected, long desired, -+ memory_order success, -+ memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong *object, ulong *expected, ulong desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, -+ ulong *expected, -+ ulong desired, -+ memory_order success, -+ memory_order failure); -+#endif // __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long *object, long *expected, long desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, -+ long *expected, long desired, -+ memory_order success, -+ memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong *object, ulong *expected, ulong desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong *object, ulong *expected, ulong desired, -+ memory_order success, memory_order failure, memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // __opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+#ifdef __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __global *object, int __global *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __local *object, int __global *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __global *object, int __local *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __local *object, int __local *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __global *object, int __private *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __local *object, int __private *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __global *object, uint __global *expected, -+ uint desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __local *object, uint __global *expected, uint desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __global *object, uint __local *expected, uint desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __local *object, uint __local *expected, uint desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __global *object, uint __private *expected, -+ uint desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __local *object, uint __private *expected, -+ uint desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __global *object, int __global *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __local *object, int __global *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __global *object, int __local *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __local *object, int __local *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __global *object, int __private *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __local *object, int __private *expected, int desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __global *object, uint __global *expected, -+ uint desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __local *object, uint __global *expected, uint desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __global *object, uint __local *expected, uint desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __local *object, uint __local *expected, uint desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __global *object, uint __private *expected, -+ uint desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __local *object, uint __private *expected, -+ uint desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __global *object, float __global *expected, -+ float desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __local *object, float __global *expected, -+ float desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __global *object, float __local *expected, -+ float desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __local *object, float __local *expected, -+ float desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __global *object, float __private *expected, -+ float desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __local *object, float __private *expected, -+ float desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __global *object, float __global *expected, -+ float desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __local *object, float __global *expected, -+ float desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __global *object, float __local *expected, -+ float desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __local *object, float __local *expected, -+ float desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __global *object, float __private *expected, -+ float desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __local *object, float __private *expected, -+ float desired, memory_order success, memory_order failure); -+#endif // __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __global *object, int __global *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __local *object, int __global *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __global *object, int __local *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __local *object, int __local *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __global *object, int __private *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_int __local *object, int __private *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __global *object, uint __global *expected, -+ uint desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __local *object, uint __global *expected, uint desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __global *object, uint __local *expected, uint desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __local *object, uint __local *expected, uint desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __global *object, uint __private *expected, -+ uint desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_uint __local *object, uint __private *expected, -+ uint desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __global *object, int __global *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __local *object, int __global *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __global *object, int __local *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __local *object, int __local *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __global *object, int __private *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_int __local *object, int __private *expected, int desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __global *object, uint __global *expected, -+ uint desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __local *object, uint __global *expected, uint desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __global *object, uint __local *expected, uint desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __local *object, uint __local *expected, uint desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __global *object, uint __private *expected, -+ uint desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_uint __local *object, uint __private *expected, -+ uint desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __global *object, float __global *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __local *object, float __global *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __global *object, float __local *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __local *object, float __local *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __global *object, float __private *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_float __local *object, float __private *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __global *object, float __global *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __local *object, float __global *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __global *object, float __local *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __local *object, float __local *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __global *object, float __private *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_float __local *object, float __private *expected, -+ float desired, memory_order success, memory_order failure, -+ memory_scope scope); -+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __global *object, double __global *expected, -+ double desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __local *object, double __global *expected, -+ double desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __global *object, double __local *expected, -+ double desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __local *object, double __local *expected, -+ double desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __global *object, double __private *expected, -+ double desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __local *object, double __private *expected, -+ double desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __global *object, double __global *expected, -+ double desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __local *object, double __global *expected, -+ double desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __global *object, double __local *expected, -+ double desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __local *object, double __local *expected, -+ double desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __global *object, double __private *expected, -+ double desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __local *object, double __private *expected, -+ double desired, memory_order success, memory_order failure); -+#endif // __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __global *object, double __global *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __local *object, double __global *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __global *object, double __local *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __local *object, double __local *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __global *object, double __private *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_double __local *object, double __private *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __global *object, double __global *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __local *object, double __global *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __global *object, double __local *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __local *object, double __local *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __global *object, double __private *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_double __local *object, double __private *expected, -+ double desired, memory_order success, memory_order failure, -+ memory_scope scope); -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#ifdef __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __global *object, long __global *expected, -+ long desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __local *object, long __global *expected, long desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __global *object, long __local *expected, long desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __local *object, long __local *expected, long desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __global *object, long __private *expected, -+ long desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __local *object, long __private *expected, -+ long desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __global *object, long __global *expected, -+ long desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __local *object, long __global *expected, long desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __global *object, long __local *expected, long desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __local *object, long __local *expected, long desired, -+ memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __global *object, long __private *expected, -+ long desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __local *object, long __private *expected, -+ long desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __global *object, ulong __global *expected, -+ ulong desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __local *object, ulong __global *expected, -+ ulong desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __global *object, ulong __local *expected, -+ ulong desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __local *object, ulong __local *expected, -+ ulong desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __global *object, ulong __private *expected, -+ ulong desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __local *object, ulong __private *expected, -+ ulong desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __global *object, ulong __global *expected, -+ ulong desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __local *object, ulong __global *expected, -+ ulong desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __global *object, ulong __local *expected, -+ ulong desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __local *object, ulong __local *expected, -+ ulong desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __global *object, ulong __private *expected, -+ ulong desired, memory_order success, memory_order failure); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __local *object, ulong __private *expected, -+ ulong desired, memory_order success, memory_order failure); -+#endif // __opencl_c_atomic_scope_device -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __global *object, long __global *expected, -+ long desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __local *object, long __global *expected, long desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __global *object, long __local *expected, long desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __local *object, long __local *expected, long desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __global *object, long __private *expected, -+ long desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_long __local *object, long __private *expected, -+ long desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __global *object, long __global *expected, -+ long desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __local *object, long __global *expected, long desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __global *object, long __local *expected, long desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __local *object, long __local *expected, long desired, -+ memory_order success, memory_order failure, memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __global *object, long __private *expected, -+ long desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_long __local *object, long __private *expected, -+ long desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __global *object, ulong __global *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __local *object, ulong __global *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __global *object, ulong __local *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __local *object, ulong __local *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __global *object, ulong __private *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_strong_explicit( -+ volatile atomic_ulong __local *object, ulong __private *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __global *object, ulong __global *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __local *object, ulong __global *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __global *object, ulong __local *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __local *object, ulong __local *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __global *object, ulong __private *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+bool __ovld atomic_compare_exchange_weak_explicit( -+ volatile atomic_ulong __local *object, ulong __private *expected, -+ ulong desired, memory_order success, memory_order failure, -+ memory_scope scope); -+#endif // defined(cl_khr_int64_base_atomics) && -+ // defined(cl_khr_int64_extended_atomics) -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) - - // atomic_flag_test_and_set() and atomic_flag_clear() -- -+#if defined(__opencl_c_atomic_scope_device) && \ -+ defined(__opencl_c_atomic_order_seq_cst) -+#ifdef __opencl_c_generic_address_space - bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object); --bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order); --bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); - void __ovld atomic_flag_clear(volatile atomic_flag *object); --void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order); --void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); -+#endif // __opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+bool __ovld atomic_flag_test_and_set(volatile atomic_flag __global *object); -+bool __ovld atomic_flag_test_and_set(volatile atomic_flag __local *object); -+void __ovld atomic_flag_clear(volatile atomic_flag __global *object); -+void __ovld atomic_flag_clear(volatile atomic_flag __local *object); -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+#endif // defined(__opencl_c_atomic_scope_device) && -+ // defined(__opencl_c_atomic_order_seq_cst) -+ -+#ifdef __opencl_c_generic_address_space -+#ifdef __opencl_c_atomic_scope_device -+bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, -+ memory_order order); -+void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, -+ memory_order order); -+#endif // __opencl_c_atomic_scope_device -+bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, -+ memory_order order, -+ memory_scope scope); -+void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, -+ memory_order order, memory_scope scope); -+#endif // __opencl_c_generic_address_space -+ -+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) -+#ifdef __opencl_c_atomic_scope_device -+bool __ovld atomic_flag_test_and_set_explicit( -+ volatile atomic_flag __global *object, memory_order order); -+bool __ovld atomic_flag_test_and_set_explicit( -+ volatile atomic_flag __local *object, memory_order order); -+void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object, -+ memory_order order); -+void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object, -+ memory_order order); -+#endif // __opencl_c_atomic_scope_device -+bool __ovld -+atomic_flag_test_and_set_explicit(volatile atomic_flag __global *object, -+ memory_order order, memory_scope scope); -+bool __ovld -+atomic_flag_test_and_set_explicit(volatile atomic_flag __local *object, -+ memory_order order, memory_scope scope); -+void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object, -+ memory_order order, memory_scope scope); -+void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object, -+ memory_order order, memory_scope scope); -+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) - - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - -@@ -13918,7 +15943,7 @@ float16 __ovld __cnfn shuffle(float4 x, uint16 mask); - float16 __ovld __cnfn shuffle(float8 x, uint16 mask); - float16 __ovld __cnfn shuffle(float16 x, uint16 mask); - --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double2 __ovld __cnfn shuffle(double2 x, ulong2 mask); - double2 __ovld __cnfn shuffle(double4 x, ulong2 mask); - double2 __ovld __cnfn shuffle(double8 x, ulong2 mask); -@@ -13938,7 +15963,7 @@ double16 __ovld __cnfn shuffle(double2 x, ulong16 mask); - double16 __ovld __cnfn shuffle(double4 x, ulong16 mask); - double16 __ovld __cnfn shuffle(double8 x, ulong16 mask); - double16 __ovld __cnfn shuffle(double16 x, ulong16 mask); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - #ifdef cl_khr_fp16 - half2 __ovld __cnfn shuffle(half2 x, ushort2 mask); -@@ -14142,7 +16167,7 @@ float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask); - float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask); - float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask); - --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask); - double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask); - double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask); -@@ -14162,7 +16187,7 @@ double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask); - double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask); - double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask); - double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - #ifdef cl_khr_fp16 - half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask); -@@ -14198,6 +16223,7 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))) - #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable - #endif //cl_khr_gl_msaa_sharing - -+#ifdef __opencl_c_images - /** - * Use the coordinate (coord.xy) to do an element lookup in - * the 2D image object specified by image. -@@ -14476,6 +16502,7 @@ half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord); - - // Image read functions for read_write images - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord); - int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord); - uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord); -@@ -14519,6 +16546,7 @@ float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, i - #endif //cl_khr_gl_msaa_sharing - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - #ifdef cl_khr_mipmap_image - float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod); - int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod); -@@ -14569,6 +16597,7 @@ int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, - uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY); - - #endif //cl_khr_mipmap_image -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - // Image read functions returning half4 type -@@ -14580,6 +16609,7 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord); - half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord); - half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord); - #endif //cl_khr_fp16 -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** -@@ -14669,7 +16699,7 @@ void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, flo - void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color); - void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color); - --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color); - void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color); - void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color); -@@ -14702,7 +16732,7 @@ void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, in - void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float depth); - void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float depth); - --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color); - void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color); - void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color); -@@ -14715,7 +16745,7 @@ void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 - #ifdef cl_khr_fp16 - void __ovld write_imageh(write_only image1d_t image, int coord, half4 color); - void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color); - #endif - void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color); -@@ -14725,6 +16755,7 @@ void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 col - - // Image write functions for read_write images - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color); - void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color); - void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color); -@@ -14745,7 +16776,7 @@ void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, flo - void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color); - void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color); - --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color); - void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color); - void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color); -@@ -14777,7 +16808,7 @@ void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, in - void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color); - void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color); - --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color); - void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color); - void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color); -@@ -14790,13 +16821,14 @@ void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 - #ifdef cl_khr_fp16 - void __ovld write_imageh(read_write image1d_t image, int coord, half4 color); - void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color); - #endif - void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color); - void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color); - void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color); - #endif //cl_khr_fp16 -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have -@@ -14810,7 +16842,7 @@ void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 col - int __ovld __cnfn get_image_width(read_only image1d_t image); - int __ovld __cnfn get_image_width(read_only image1d_buffer_t image); - int __ovld __cnfn get_image_width(read_only image2d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - int __ovld __cnfn get_image_width(read_only image3d_t image); - #endif - int __ovld __cnfn get_image_width(read_only image1d_array_t image); -@@ -14829,7 +16861,7 @@ int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image); - int __ovld __cnfn get_image_width(write_only image1d_t image); - int __ovld __cnfn get_image_width(write_only image1d_buffer_t image); - int __ovld __cnfn get_image_width(write_only image2d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - int __ovld __cnfn get_image_width(write_only image3d_t image); - #endif - int __ovld __cnfn get_image_width(write_only image1d_array_t image); -@@ -14846,6 +16878,7 @@ int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - int __ovld __cnfn get_image_width(read_write image1d_t image); - int __ovld __cnfn get_image_width(read_write image1d_buffer_t image); - int __ovld __cnfn get_image_width(read_write image2d_t image); -@@ -14862,6 +16895,7 @@ int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image); - int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image); - int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** -@@ -14882,7 +16916,7 @@ int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing - - int __ovld __cnfn get_image_height(write_only image2d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - int __ovld __cnfn get_image_height(write_only image3d_t image); - #endif - int __ovld __cnfn get_image_height(write_only image2d_array_t image); -@@ -14898,6 +16932,7 @@ int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - int __ovld __cnfn get_image_height(read_write image2d_t image); - int __ovld __cnfn get_image_height(read_write image3d_t image); - int __ovld __cnfn get_image_height(read_write image2d_array_t image); -@@ -14911,6 +16946,7 @@ int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image); - int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image); - int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** -@@ -14918,12 +16954,14 @@ int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image); - */ - int __ovld __cnfn get_image_depth(read_only image3d_t image); - --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - int __ovld __cnfn get_image_depth(write_only image3d_t image); - #endif - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - int __ovld __cnfn get_image_depth(read_write image3d_t image); -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - // OpenCL Extension v2.0 s9.18 - Mipmaps -@@ -14939,13 +16977,15 @@ int __ovld get_image_num_mip_levels(read_only image3d_t image); - - int __ovld get_image_num_mip_levels(write_only image1d_t image); - int __ovld get_image_num_mip_levels(write_only image2d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - int __ovld get_image_num_mip_levels(write_only image3d_t image); - #endif - -+#ifdef __opencl_c_read_write_images - int __ovld get_image_num_mip_levels(read_write image1d_t image); - int __ovld get_image_num_mip_levels(read_write image2d_t image); - int __ovld get_image_num_mip_levels(read_write image3d_t image); -+#endif //__opencl_c_read_write_images - - int __ovld get_image_num_mip_levels(read_only image1d_array_t image); - int __ovld get_image_num_mip_levels(read_only image2d_array_t image); -@@ -14957,10 +16997,12 @@ int __ovld get_image_num_mip_levels(write_only image2d_array_t image); - int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image); - int __ovld get_image_num_mip_levels(write_only image2d_depth_t image); - -+#ifdef __opencl_c_read_write_images - int __ovld get_image_num_mip_levels(read_write image1d_array_t image); - int __ovld get_image_num_mip_levels(read_write image2d_array_t image); - int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image); - int __ovld get_image_num_mip_levels(read_write image2d_depth_t image); -+#endif //__opencl_c_read_write_images - - #endif //cl_khr_mipmap_image - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -@@ -15004,7 +17046,7 @@ int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth - int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image); - int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image); - int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image); - #endif - int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image); -@@ -15021,6 +17063,7 @@ int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_dept - #endif //cl_khr_gl_msaa_sharing - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image); - int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image); - int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image); -@@ -15037,6 +17080,7 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t im - int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image); - int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** -@@ -15076,7 +17120,7 @@ int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t i - int __ovld __cnfn get_image_channel_order(write_only image1d_t image); - int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image); - int __ovld __cnfn get_image_channel_order(write_only image2d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - int __ovld __cnfn get_image_channel_order(write_only image3d_t image); - #endif - int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image); -@@ -15093,6 +17137,7 @@ int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t - #endif //cl_khr_gl_msaa_sharing - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - int __ovld __cnfn get_image_channel_order(read_write image1d_t image); - int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image); - int __ovld __cnfn get_image_channel_order(read_write image2d_t image); -@@ -15109,6 +17154,7 @@ int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image) - int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image); - int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** -@@ -15143,6 +17189,7 @@ int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - int2 __ovld __cnfn get_image_dim(read_write image2d_t image); - int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image); - #ifdef cl_khr_depth_images -@@ -15155,6 +17202,7 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image); - int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image); - int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); - #endif //cl_khr_gl_msaa_sharing -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** -@@ -15164,11 +17212,13 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); - * component and the w component is 0. - */ - int4 __ovld __cnfn get_image_dim(read_only image3d_t image); --#ifdef cl_khr_3d_image_writes -+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) - int4 __ovld __cnfn get_image_dim(write_only image3d_t image); - #endif - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - int4 __ovld __cnfn get_image_dim(read_write image3d_t image); -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** -@@ -15196,6 +17246,7 @@ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t - #endif //cl_khr_gl_msaa_sharing - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array); - size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array); - #ifdef cl_khr_depth_images -@@ -15205,6 +17256,7 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image - size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array); - size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array); - #endif //cl_khr_gl_msaa_sharing -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - /** -@@ -15222,16 +17274,21 @@ int __ovld get_image_num_samples(write_only image2d_array_msaa_t image); - int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image); - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_read_write_images - int __ovld get_image_num_samples(read_write image2d_msaa_t image); - int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image); - int __ovld get_image_num_samples(read_write image2d_array_msaa_t image); - int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image); -+#endif //__opencl_c_read_write_images - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - #endif - -+#endif //__opencl_c_images -+ - // OpenCL v2.0 s6.13.15 - Work-group Functions - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_work_group_collective_functions - int __ovld __conv work_group_all(int predicate); - int __ovld __conv work_group_any(int predicate); - -@@ -15255,11 +17312,11 @@ ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z); - float __ovld __conv work_group_broadcast(float a, size_t local_id); - float __ovld __conv work_group_broadcast(float a, size_t x, size_t y); - float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __conv work_group_broadcast(double a, size_t local_id); - double __ovld __conv work_group_broadcast(double a, size_t x, size_t y); - double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - #ifdef cl_khr_fp16 - half __ovld __conv work_group_reduce_add(half x); -@@ -15317,7 +17374,7 @@ float __ovld __conv work_group_scan_exclusive_max(float x); - float __ovld __conv work_group_scan_inclusive_add(float x); - float __ovld __conv work_group_scan_inclusive_min(float x); - float __ovld __conv work_group_scan_inclusive_max(float x); --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __conv work_group_reduce_add(double x); - double __ovld __conv work_group_reduce_min(double x); - double __ovld __conv work_group_reduce_max(double x); -@@ -15327,19 +17384,18 @@ double __ovld __conv work_group_scan_exclusive_max(double x); - double __ovld __conv work_group_scan_inclusive_add(double x); - double __ovld __conv work_group_scan_inclusive_min(double x); - double __ovld __conv work_group_scan_inclusive_max(double x); --#endif //cl_khr_fp64 -- -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) -+#endif //__opencl_c_work_group_collective_functions - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - // OpenCL v2.0 s6.13.16 - Pipe Functions --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_pipes - bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); --#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -- -+#endif //__opencl_c_pipes - - // OpenCL v2.0 s6.13.17 - Enqueue Kernels - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -- -+#ifdef __opencl_c_device_enqueue - ndrange_t __ovld ndrange_1D(size_t); - ndrange_t __ovld ndrange_1D(size_t, size_t); - ndrange_t __ovld ndrange_1D(size_t, size_t, size_t); -@@ -15367,11 +17423,13 @@ bool __ovld is_valid_event (clk_event_t event); - void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value); - - queue_t __ovld get_default_queue(void); -+#endif //__opencl_c_device_enqueue - #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - // OpenCL Extension v2.0 s9.17 - Sub-groups - --#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) -+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ -+ defined(__opencl_c_subgroups) - // Shared Sub Group Functions - uint __ovld get_sub_group_size(void); - uint __ovld get_max_sub_group_size(void); -@@ -15457,7 +17515,7 @@ half __ovld __conv sub_group_scan_inclusive_min(half x); - half __ovld __conv sub_group_scan_inclusive_max(half x); - #endif //cl_khr_fp16 - --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id); - double __ovld __conv sub_group_reduce_add(double x); - double __ovld __conv sub_group_reduce_min(double x); -@@ -15468,7 +17526,7 @@ double __ovld __conv sub_group_scan_exclusive_max(double x); - double __ovld __conv sub_group_scan_inclusive_add(double x); - double __ovld __conv sub_group_scan_inclusive_min(double x); - double __ovld __conv sub_group_scan_inclusive_max(double x); --#endif //cl_khr_fp64 -+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) - - #endif //cl_khr_subgroups cl_intel_subgroups - -@@ -15570,16 +17628,22 @@ uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c ); - long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c ); - ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c ); - -+#ifdef __opencl_c_images - uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord ); - uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord ); - uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord ); - uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord ); -+#endif //__opencl_c_images - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_images -+#ifdef __opencl_c_read_write_images - uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord); - uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord); - uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord); - uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord); -+#endif //__opencl_c_read_write_images -+#endif //__opencl_c_images - #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - uint __ovld __conv intel_sub_group_block_read( const __global uint* p ); -@@ -15587,16 +17651,22 @@ uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p ); - uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p ); - uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p ); - -+#ifdef __opencl_c_images - void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data); - void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data); - void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data); - void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data); -+#endif //__opencl_c_images - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_images -+#ifdef __opencl_c_read_write_images - void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data); - void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data); - void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data); - void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data); -+#endif // __opencl_c_read_write_images -+#endif // __opencl_c_images - #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data ); -@@ -15611,7 +17681,7 @@ half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c ); - half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c ); - #endif - --#if defined(cl_khr_fp64) -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - double __ovld __conv intel_sub_group_shuffle( double x, uint c ); - double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c ); - double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c ); -@@ -15710,16 +17780,22 @@ ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x ); - short __ovld __conv intel_sub_group_scan_inclusive_max( short x ); - ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x ); - -+#ifdef __opencl_c_images - uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord ); - uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord ); - uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord ); - uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord ); -+#endif //__opencl_c_images - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_images -+#ifdef __opencl_c_read_write_images - uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord ); - uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord ); - uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord ); - uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord ); -+#endif //__opencl_c_read_write_images -+#endif //__opencl_c_images - #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p ); -@@ -15727,16 +17803,22 @@ uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p - uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p ); - uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p ); - -+#ifdef __opencl_c_images - void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data ); - void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data ); - void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data ); - void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data ); -+#endif //__opencl_c_images - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_images -+#ifdef __opencl_c_read_write_images - void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data ); - void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data ); - void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data ); - void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data ); -+#endif //__opencl_c_read_write_images -+#endif //__opencl_c_images - #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data ); -@@ -15744,16 +17826,22 @@ void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint - void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data ); - void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data ); - -+#ifdef __opencl_c_images - ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord ); - ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord ); - ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord ); - ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord ); -+#endif //__opencl_c_images - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_images -+#ifdef __opencl_c_read_write_images - ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord); - ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord); - ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord); - ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord); -+#endif //__opencl_c_read_write_images -+#endif //__opencl_c_images - #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p ); -@@ -15761,16 +17849,22 @@ ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* - ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p ); - ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p ); - -+#ifdef __opencl_c_images - void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data); - void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data); - void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data); - void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data); -+#endif //__opencl_c_images - - #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#ifdef __opencl_c_images -+#ifdef __opencl_c_read_write_images - void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data); - void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data); - void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data); - void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data); -+#endif //__opencl_c_read_write_images -+#endif //__opencl_c_images - #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) - - void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data ); -@@ -15889,6 +17983,7 @@ short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset( - short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size, - ushort2 image_size); - -+#ifdef __opencl_c_images - intel_sub_group_avc_ime_result_t __ovld - intel_sub_group_avc_ime_evaluate_with_single_reference( - read_only image2d_t src_image, read_only image2d_t ref_image, -@@ -15929,6 +18024,7 @@ intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout( - read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, - intel_sub_group_avc_ime_payload_t payload, - intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components); -+#endif //__opencl_c_images - - intel_sub_group_avc_ime_single_reference_streamin_t __ovld - intel_sub_group_avc_ime_get_single_reference_streamin( -@@ -15993,6 +18089,7 @@ intel_sub_group_avc_ref_payload_t __ovld - intel_sub_group_avc_ref_set_bilinear_filter_enable( - intel_sub_group_avc_ref_payload_t payload); - -+#ifdef __opencl_c_images - intel_sub_group_avc_ref_result_t __ovld - intel_sub_group_avc_ref_evaluate_with_single_reference( - read_only image2d_t src_image, read_only image2d_t ref_image, -@@ -16011,6 +18108,7 @@ intel_sub_group_avc_ref_evaluate_with_multi_reference( - read_only image2d_t src_image, uint packed_reference_ids, - uchar packed_reference_field_polarities, sampler_t vme_media_sampler, - intel_sub_group_avc_ref_payload_t payload); -+#endif //__opencl_c_images - - // SIC built-in functions - intel_sub_group_avc_sic_payload_t __ovld -@@ -16061,6 +18159,7 @@ intel_sub_group_avc_sic_set_block_based_raw_skip_sad( - uchar block_based_skip_type, - intel_sub_group_avc_sic_payload_t payload); - -+#ifdef __opencl_c_images - intel_sub_group_avc_sic_result_t __ovld - intel_sub_group_avc_sic_evaluate_ipe( - read_only image2d_t src_image, sampler_t vme_media_sampler, -@@ -16083,6 +18182,7 @@ intel_sub_group_avc_sic_evaluate_with_multi_reference( - read_only image2d_t src_image, uint packed_reference_ids, - uchar packed_reference_field_polarities, sampler_t vme_media_sampler, - intel_sub_group_avc_sic_payload_t payload); -+#endif //__opencl_c_images - - uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape( - intel_sub_group_avc_sic_result_t result); -diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp -index 6353e14bc41a..a8c216de6e04 100644 ---- a/clang/lib/Parse/ParseDecl.cpp -+++ b/clang/lib/Parse/ParseDecl.cpp -@@ -3904,8 +3904,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, - isInvalid = DS.SetTypeAltiVecBool(true, Loc, PrevSpec, DiagID, Policy); - break; - case tok::kw_pipe: -- if (!getLangOpts().OpenCL || (getLangOpts().OpenCLVersion < 200 && -- !getLangOpts().OpenCLCPlusPlus)) { -+ if (!getLangOpts().OpenCLPipeKeyword) { - // OpenCL 2.0 defined this keyword. OpenCL 1.2 and earlier should - // support the "pipe" word as identifier. - Tok.getIdentifierInfo()->revertTokenIDToIdentifier(); -@@ -4027,8 +4026,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, - case tok::kw___generic: - // generic address space is introduced only in OpenCL v2.0 - // see OpenCL C Spec v2.0 s6.5.5 -- if (Actions.getLangOpts().OpenCLVersion < 200 && -- !Actions.getLangOpts().OpenCLCPlusPlus) { -+ if (!Actions.getLangOpts().OpenCLGenericKeyword) { - DiagID = diag::err_opencl_unknown_type_specifier; - PrevSpec = Tok.getIdentifierInfo()->getNameStart(); - isInvalid = true; -@@ -5050,8 +5048,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { - default: return false; - - case tok::kw_pipe: -- return (getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200) || -- getLangOpts().OpenCLCPlusPlus; -+ return getLangOpts().OpenCLPipeKeyword; - - case tok::identifier: // foo::bar - // Unfortunate hack to support "Class.factoryMethod" notation. -@@ -5557,8 +5554,7 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang, - if (Kind == tok::star || Kind == tok::caret) - return true; - -- if (Kind == tok::kw_pipe && -- ((Lang.OpenCL && Lang.OpenCLVersion >= 200) || Lang.OpenCLCPlusPlus)) -+ if (Kind == tok::kw_pipe && Lang.OpenCLPipeKeyword) - return true; - - if (!Lang.CPlusPlus) -diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp -index df411e1928d6..9a40ce888695 100644 ---- a/clang/lib/Parse/ParsePragma.cpp -+++ b/clang/lib/Parse/ParsePragma.cpp -@@ -697,12 +697,14 @@ void Parser::HandlePragmaOpenCLExtension() { - if (Name == "all") { - if (State == Disable) { - Opt.disableAll(); -- Opt.enableSupportedCore(getLangOpts()); -+ Opt.enableSupportedCore(); - } else { - PP.Diag(NameLoc, diag::warn_pragma_expected_predicate) << 1; - } -+ } else if (Opt.isFeature(Name)) { -+ PP.Diag(NameLoc, diag::warn_opencl_pragma_feature_ignore) << Ident; - } else if (State == Begin) { -- if (!Opt.isKnown(Name) || !Opt.isSupported(Name, getLangOpts())) { -+ if (!Opt.isKnown(Name) || !Opt.isSupported(Name)) { - Opt.support(Name); - } - Actions.setCurrentOpenCLExtension(Name); -@@ -712,9 +714,9 @@ void Parser::HandlePragmaOpenCLExtension() { - Actions.setCurrentOpenCLExtension(""); - } else if (!Opt.isKnown(Name)) - PP.Diag(NameLoc, diag::warn_pragma_unknown_extension) << Ident; -- else if (Opt.isSupportedExtension(Name, getLangOpts())) -+ else if (Opt.isSupportedExtension(Name)) - Opt.enable(Name, State == Enable); -- else if (Opt.isSupportedCore(Name, getLangOpts())) -+ else if (Opt.isSupportedCore(Name)) - PP.Diag(NameLoc, diag::warn_pragma_extension_is_core) << Ident; - else - PP.Diag(NameLoc, diag::warn_pragma_unsupported_extension) << Ident; -diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td -index 9d6bb411eff8..d352d35f1e46 100644 ---- a/clang/lib/Sema/OpenCLBuiltins.td -+++ b/clang/lib/Sema/OpenCLBuiltins.td -@@ -22,11 +22,13 @@ - class Version { - int ID = _Version; - } -+ - def CLAll : Version< 0>; - def CL10 : Version<100>; - def CL11 : Version<110>; - def CL12 : Version<120>; - def CL20 : Version<200>; -+def CL30 : Version<300>; - - // Address spaces - // Pointer types need to be assigned an address space. -@@ -65,6 +67,14 @@ def FuncExtKhrGlMsaaSharing : FunctionExtension<"cl_khr_gl_msaa_sha - // Multiple extensions - def FuncExtKhrMipmapAndWrite3d : FunctionExtension<"cl_khr_mipmap_image cl_khr_3d_image_writes">; - -+// Features -+def FuncExtGenericAddressSpace : FunctionExtension<"__opencl_c_generic_address_space">; -+def FuncExtWorkGroupCollective : FunctionExtension<"__opencl_c_work_group_collective_functions">; -+def FuncExtPipes : FunctionExtension<"__opencl_c_pipes">; -+def FuncExtDeviceSidEenqueue : FunctionExtension<"__opencl_c_device_enqueue">; -+def FuncNonExplicitAtomicFeatures : FunctionExtension<"__opencl_c_atomic_order_seq_cst __opencl_c_atomic_scope_device">; -+def FuncNonExplicitAtomicFeaturesAndGenericAS : FunctionExtension<"__opencl_c_atomic_order_seq_cst __opencl_c_atomic_scope_device __opencl_c_generic_address_space">; -+ - // Qualified Type. These map to ASTContext::QualType. - class QualType { - // Name of the field or function in a clang::ASTContext -@@ -230,13 +240,9 @@ class Builtin _Signature, list _Attributes = Attr. - bit IsConv = _Attributes[2]; - // OpenCL extensions to which the function belongs. - FunctionExtension Extension = FuncExtNone; -- // Version of OpenCL from which the function is available (e.g.: CL10). -- // MinVersion is inclusive. -- Version MinVersion = CL10; -- // Version of OpenCL from which the function is not supported anymore. -- // MaxVersion is exclusive. -+ // List of OpenCL version in which this function available. - // CLAll makes the function available for all versions. -- Version MaxVersion = CLAll; -+ list Versions = [CLAll]; - } - - //===----------------------------------------------------------------------===// -@@ -398,7 +404,7 @@ foreach name = ["get_global_size", "get_global_id", "get_local_size", - def : Builtin; - } - --let MinVersion = CL20 in { -+let Versions = [CL20, CL30] in { - def : Builtin<"get_enqueued_local_size", [Size, UInt]>; - foreach name = ["get_global_linear_id", "get_local_linear_id"] in { - def : Builtin; -@@ -471,7 +477,7 @@ foreach name = ["fma", "mad"] in { - } - - // --- Version dependent --- --let MaxVersion = CL20 in { -+let Versions = [CL10, CL11, CL12, CL30] in { - foreach AS = [GlobalAS, LocalAS, PrivateAS] in { - foreach name = ["fract", "modf", "sincos"] in { - def : Builtin]>; -@@ -488,7 +494,9 @@ let MaxVersion = CL20 in { - } - } - } --let MinVersion = CL20 in { -+ -+let Versions = [CL20, CL30] in { -+ let Extension = FuncExtGenericAddressSpace in { - foreach name = ["fract", "modf", "sincos"] in { - def : Builtin]>; - } -@@ -501,6 +509,7 @@ let MinVersion = CL20 in { - def : Builtin]>; - } - } -+ } - } - - // --- Table 9 --- -@@ -531,7 +540,7 @@ foreach name = ["abs"] in { - foreach name = ["clz", "popcount"] in { - def : Builtin; - } --let MinVersion = CL20 in { -+let Versions = [CL20, CL30] in { - foreach name = ["ctz"] in { - def : Builtin; - } -@@ -705,7 +714,7 @@ foreach name = ["select"] in { - // --- Table 15 --- - // Variants for OpenCL versions below 2.0, using pointers to the global, local - // and private address spaces. --let MaxVersion = CL20 in { -+let Versions = [CL10, CL11, CL12, CL30] in { - foreach AS = [GlobalAS, LocalAS, PrivateAS] in { - foreach VSize = [2, 3, 4, 8, 16] in { - foreach name = ["vload" # VSize] in { -@@ -748,7 +757,8 @@ let MaxVersion = CL20 in { - } - // Variants for OpenCL versions above 2.0, using pointers to the generic - // address space. --let MinVersion = CL20 in { -+let Versions = [CL20, CL30] in { -+ let Extension = FuncExtGenericAddressSpace in { - foreach VSize = [2, 3, 4, 8, 16] in { - foreach name = ["vload" # VSize] in { - def : Builtin, Size, PointerType, GenericAS>]>; -@@ -786,6 +796,7 @@ let MinVersion = CL20 in { - } - } - } -+ } - } - // Variants using pointers to the constant address space. - foreach VSize = [2, 3, 4, 8, 16] in { -@@ -812,7 +823,7 @@ foreach VSize = [2, 3, 4, 8, 16] in { - } - } - } --let MaxVersion = CL20 in { -+let Versions = [CL10, CL11, CL12, CL30] in { - foreach AS = [GlobalAS, LocalAS, PrivateAS] in { - def : Builtin<"vload_half", [Float, Size, PointerType, AS>]>; - foreach VSize = [2, 3, 4, 8, 16] in { -@@ -832,7 +843,8 @@ let MaxVersion = CL20 in { - } - } - } --let MinVersion = CL20 in { -+let Versions = [CL20, CL30] in { -+ let Extension = FuncExtGenericAddressSpace in { - foreach AS = [GenericAS] in { - def : Builtin<"vload_half", [Float, Size, PointerType, AS>]>; - foreach VSize = [2, 3, 4, 8, 16] in { -@@ -851,6 +863,7 @@ let MinVersion = CL20 in { - } - } - } -+ } - } - - foreach AS = [ConstantAS] in { -@@ -875,7 +888,9 @@ foreach name = ["async_work_group_strided_copy"] in { - def : Builtin, PointerType, LocalAS>, Size, Size, Event]>; - } - foreach name = ["wait_group_events"] in { -+ let Extension = FuncExtGenericAddressSpace in { - def : Builtin]>; -+ } - } - foreach name = ["prefetch"] in { - def : Builtin, GlobalAS>, Size]>; -@@ -1154,7 +1169,8 @@ foreach aQual = ["WO", "RW"] in { - //-------------------------------------------------------------------- - // OpenCL v2.0 s6.13.15 - Work-group Functions - // --- Table 26 --- --let MinVersion = CL20 in { -+let Versions = [CL20, CL30] in { -+ let Extension = FuncExtWorkGroupCollective in { - foreach name = ["work_group_all", "work_group_any"] in { - def : Builtin; - } -@@ -1169,11 +1185,12 @@ let MinVersion = CL20 in { - def : Builtin; - } - } -+ } - } - - - // OpenCL v2.0 s9.17.3: Additions to section 6.13.1: Work-Item Functions --let MinVersion = CL20 in { -+let Versions = [CL20] in { - let Extension = FuncExtKhrSubgroups in { - def get_sub_group_size : Builtin<"get_sub_group_size", [UInt]>; - def get_max_sub_group_size : Builtin<"get_max_sub_group_size", [UInt]>; -diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp -index 9cfce5a63b1d..c26f45f62668 100644 ---- a/clang/lib/Sema/Sema.cpp -+++ b/clang/lib/Sema/Sema.cpp -@@ -286,9 +286,10 @@ void Sema::Initialize() { - // Initialize predefined OpenCL types and supported extensions and (optional) - // core features. - if (getLangOpts().OpenCL) { -+ getOpenCLOptions().setOpenCLVersion(getLangOpts()); - getOpenCLOptions().addSupport( - Context.getTargetInfo().getSupportedOpenCLOpts()); -- getOpenCLOptions().enableSupportedCore(getLangOpts()); -+ getOpenCLOptions().enableSupportedCore(); - addImplicitTypedef("sampler_t", Context.OCLSamplerTy); - addImplicitTypedef("event_t", Context.OCLEventTy); - if (getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) { -@@ -344,12 +345,18 @@ void Sema::Initialize() { - "cl_khr_int64_base_atomics cl_khr_int64_extended_atomics"); - - setOpenCLExtensionForType(AtomicDoubleT, "cl_khr_fp64"); -+ setOpenCLExtensionForType(Context.OCLReserveIDTy, "__opencl_c_pipes"); -+ setOpenCLExtensionForType(Context.OCLClkEventTy, -+ "__opencl_c_device_enqueue"); -+ setOpenCLExtensionForType(Context.OCLQueueTy, -+ "__opencl_c_device_enqueue"); - } - - setOpenCLExtensionForType(Context.DoubleTy, "cl_khr_fp64"); - --#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \ -- setOpenCLExtensionForType(Context.Id, Ext); -+#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \ -+ setOpenCLExtensionForType(Context.Id, Ext); \ -+ setOpenCLExtensionForType(Context.Id, "__opencl_c_images"); - #include "clang/Basic/OpenCLImageTypes.def" - #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ - addImplicitTypedef(#ExtType, Context.Id##Ty); \ -@@ -2293,6 +2300,27 @@ bool Sema::isOpenCLDisabledDecl(Decl *FD) { - return false; - } - -+template -+void Sema::DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName, -+ DiagLocT DiagLoc, DiagInfoT DiagInfo, -+ unsigned Selector, -+ SourceRange SrcRange) { -+ const auto &LO = getLangOpts(); -+ auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; -+ // For versions higher that 3.0 diagnosing feature -+ if (CLVer >= 300) { -+ OpenCLOptName = -+ llvm::StringSwitch(OpenCLOptName) -+ .Case("cl_khr_3d_image_writes", "__opencl_c_3d_image_writes") -+ .Case("cl_khr_subgroups", "__opencl_c_subgroups") -+ .Case("cl_khr_fp64", "__opencl_c_fp64") -+ .Default(OpenCLOptName); -+ } -+ -+ Diag(DiagLoc, diag::err_opencl_requires_extension) -+ << Selector << DiagInfo << OpenCLOptName << SrcRange; -+} -+ - template - bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, - DiagInfoT DiagInfo, MapT &Map, -@@ -2304,8 +2332,7 @@ bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, - bool Disabled = false; - for (auto &I : Loc->second) { - if (I != CurrOpenCLExtension && !getOpenCLOptions().isEnabled(I)) { -- Diag(DiagLoc, diag::err_opencl_requires_extension) << Selector << DiagInfo -- << I << SrcRange; -+ DiagnoseOpenCLRequiresOption(I, DiagLoc, DiagInfo, Selector, SrcRange); - Disabled = true; - } - } -@@ -2341,3 +2368,13 @@ bool Sema::checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E) { - return checkOpenCLDisabledTypeOrDecl(&D, E.getBeginLoc(), FnName, - OpenCLDeclExtMap, 1, D.getSourceRange()); - } -+ -+bool Sema::checkOpenCLSubgroupExtForCallExpr(CallExpr *Call) { -+ if (!getOpenCLOptions().isEnabled("cl_khr_subgroups")) { -+ DiagnoseOpenCLRequiresOption("cl_khr_subgroups", Call->getBeginLoc(), -+ Call->getDirectCallee(), 1, -+ Call->getSourceRange()); -+ return true; -+ } -+ return false; -+} -diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp -index 74742023d1b3..efa3b6ab0eb6 100644 ---- a/clang/lib/Sema/SemaChecking.cpp -+++ b/clang/lib/Sema/SemaChecking.cpp -@@ -597,20 +597,11 @@ static bool checkOpenCLBlockArgs(Sema &S, Expr *BlockArg) { - return IllegalParams; - } - --static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) { -- if (!S.getOpenCLOptions().isEnabled("cl_khr_subgroups")) { -- S.Diag(Call->getBeginLoc(), diag::err_opencl_requires_extension) -- << 1 << Call->getDirectCallee() << "cl_khr_subgroups"; -- return true; -- } -- return false; --} -- - static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) { - if (checkArgCount(S, TheCall, 2)) - return true; - -- if (checkOpenCLSubgroupExt(S, TheCall)) -+ if (S.checkOpenCLSubgroupExtForCallExpr(TheCall)) - return true; - - // First argument is an ndrange_t type. -@@ -1564,7 +1555,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, - break; - case Builtin::BIsub_group_reserve_read_pipe: - case Builtin::BIsub_group_reserve_write_pipe: -- if (checkOpenCLSubgroupExt(*this, TheCall) || -+ if (checkOpenCLSubgroupExtForCallExpr(TheCall) || - SemaBuiltinReserveRWPipe(*this, TheCall)) - return ExprError(); - break; -@@ -1577,7 +1568,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, - break; - case Builtin::BIsub_group_commit_read_pipe: - case Builtin::BIsub_group_commit_write_pipe: -- if (checkOpenCLSubgroupExt(*this, TheCall) || -+ if (checkOpenCLSubgroupExtForCallExpr(TheCall) || - SemaBuiltinCommitRWPipe(*this, TheCall)) - return ExprError(); - break; -@@ -4314,6 +4305,20 @@ DiagnoseCStringFormatDirectiveInCFAPI(Sema &S, - } - } - -+bool Sema::isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const { -+ assert(llvm::isValidAtomicOrderingCABI(Ordering)); -+ auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering; -+ switch (OrderingCABI) { -+ case llvm::AtomicOrderingCABI::acquire: -+ case llvm::AtomicOrderingCABI::release: -+ case llvm::AtomicOrderingCABI::acq_rel: -+ return OpenCLFeatures.isEnabled("__opencl_c_atomic_order_acq_rel"); -+ -+ default: -+ return true; -+ } -+} -+ - /// Determine whether the given type has a non-null nullability annotation. - static bool isNonNullType(ASTContext &ctx, QualType type) { - if (auto nullability = type->getNullability(ctx)) -@@ -5067,10 +5072,17 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, - if (SubExprs.size() >= 2 && Form != Init) { - llvm::APSInt Result(32); - if (SubExprs[1]->isIntegerConstantExpr(Result, Context) && -- !isValidOrderingForOp(Result.getSExtValue(), Op)) -+ !isValidOrderingForOp(Result.getSExtValue(), Op)) { - Diag(SubExprs[1]->getBeginLoc(), - diag::warn_atomic_op_has_invalid_memory_order) - << SubExprs[1]->getSourceRange(); -+ } else if (IsOpenCL && -+ !isSupportedOpenCLOMemoryOrdering(Result.getSExtValue())) { -+ Diag(SubExprs[1]->getBeginLoc(), -+ diag::err_opencl_memory_ordering_require_feat) -+ << SubExprs[1]->getSourceRange(); -+ return ExprError(); -+ } - } - - if (auto ScopeModel = AtomicExpr::getScopeModel(Op)) { -diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp -index 64146f4a912f..c1e629b5dc76 100644 ---- a/clang/lib/Sema/SemaDecl.cpp -+++ b/clang/lib/Sema/SemaDecl.cpp -@@ -6152,7 +6152,9 @@ void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) { - if (Type->isSamplerT() || Type->isVoidType()) - return; - LangAS ImplAS = LangAS::opencl_private; -- if ((getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) && -+ if ((getLangOpts().OpenCLCPlusPlus || -+ getOpenCLOptions().isEnabled( -+ "__opencl_c_program_scope_global_variables")) && - Var->hasGlobalStorage()) - ImplAS = LangAS::opencl_global; - // If the original type from a decayed type is an array type and that array -@@ -7682,6 +7684,10 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { - // OpenCL C v2.0 s6.5.1 - Variables defined at program scope and static - // variables inside a function can also be declared in the global - // address space. -+ // OpenCL C v3.0 s5.5 - For OpenCL C 2.0, or with the -+ // __opencl_c_program_scope_global_variables feature macro, the -+ // address space for a variable at program scope or a static variable -+ // also be __global - // C++ for OpenCL inherits rule from OpenCL C v2.0. - // FIXME: Adding local AS in C++ for OpenCL might make sense. - if (NewVD->isFileVarDecl() || NewVD->isStaticLocal() || -@@ -7689,10 +7695,11 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { - if (!T->isSamplerT() && - !(T.getAddressSpace() == LangAS::opencl_constant || - (T.getAddressSpace() == LangAS::opencl_global && -- (getLangOpts().OpenCLVersion == 200 || -- getLangOpts().OpenCLCPlusPlus)))) { -+ (OpenCLFeatures.isEnabled( -+ "__opencl_c_program_scope_global_variables"))))) { - int Scope = NewVD->isStaticLocal() | NewVD->hasExternalStorage() << 1; -- if (getLangOpts().OpenCLVersion == 200 || getLangOpts().OpenCLCPlusPlus) -+ if (OpenCLFeatures.isEnabled( -+ "__opencl_c_program_scope_global_variables")) - Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space) - << Scope << "global or constant"; - else -diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp -index 849bc09063b3..c963b95b131a 100644 ---- a/clang/lib/Sema/SemaDeclAttr.cpp -+++ b/clang/lib/Sema/SemaDeclAttr.cpp -@@ -6362,7 +6362,7 @@ static void handleInternalLinkageAttr(Sema &S, Decl *D, const ParsedAttr &AL) { - } - - static void handleOpenCLNoSVMAttr(Sema &S, Decl *D, const ParsedAttr &AL) { -- if (S.LangOpts.OpenCLVersion != 200) -+ if (S.LangOpts.OpenCLVersion < 200) - S.Diag(AL.getLoc(), diag::err_attribute_requires_opencl_version) - << AL << "2.0" << 0; - else -@@ -6446,6 +6446,13 @@ static void handleOpenCLAccessAttr(Sema &S, Decl *D, const ParsedAttr &AL) { - << AL << PDecl->getType() << DeclTy->isImageType(); - D->setInvalidDecl(true); - return; -+ } else if ((!S.getLangOpts().OpenCLCPlusPlus && -+ S.getLangOpts().OpenCLVersion >= 200) && -+ !S.getOpenCLOptions().isEnabled( -+ "__opencl_c_read_write_images")) { -+ S.Diag(AL.getLoc(), diag::err_opencl_requires_extension) -+ << 0 << PDecl->getType() << "__opencl_c_read_write_images"; -+ return; - } - } - } -diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp -index 831e55046e80..4481a59b4517 100644 ---- a/clang/lib/Sema/SemaDeclCXX.cpp -+++ b/clang/lib/Sema/SemaDeclCXX.cpp -@@ -14906,6 +14906,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl, - if (auto *PtrTy = ResultType->getAs()) { - ResultType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy); - } -+ if (CanQual ExpectedPtrTy = -+ ExpectedResultType->getAs()) { -+ ExpectedResultType = SemaRef.Context.getCanonicalType( -+ RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr())); -+ } - } - - // Check that the result type is what we expect. -@@ -14939,6 +14944,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl, - FnDecl->getParamDecl(0)->getType()->getAs()) { - FirstParamType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy); - } -+ if (CanQual ExpectedPtrTy = -+ ExpectedFirstParamType->getAs()) { -+ ExpectedFirstParamType = SemaRef.Context.getCanonicalType( -+ RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr())); -+ } - } - if (SemaRef.Context.getCanonicalType(FirstParamType).getUnqualifiedType() != - ExpectedFirstParamType) -diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp -index 8d96404a5c27..8f21203b9508 100644 ---- a/clang/lib/Sema/SemaLookup.cpp -+++ b/clang/lib/Sema/SemaLookup.cpp -@@ -771,19 +771,20 @@ static void InsertOCLBuiltinDeclarationsFromTable(Sema &S, LookupResult &LR, - // as argument. Only meaningful for generic types, otherwise equals 1. - unsigned GenTypeMaxCnt; - -+ ASTContext &Context = S.Context; -+ -+ // Ignore this BIF if its version does not match the language options. -+ unsigned OpenCLVersion = Context.getLangOpts().OpenCLVersion; -+ if (Context.getLangOpts().OpenCLCPlusPlus) -+ OpenCLVersion = 200; -+ -+ unsigned short VersionCode = OpenCLOptions::EncodeVersion(OpenCLVersion); -+ - for (unsigned SignatureIndex = 0; SignatureIndex < Len; SignatureIndex++) { - const OpenCLBuiltinStruct &OpenCLBuiltin = - BuiltinTable[FctIndex + SignatureIndex]; -- ASTContext &Context = S.Context; - -- // Ignore this BIF if its version does not match the language options. -- unsigned OpenCLVersion = Context.getLangOpts().OpenCLVersion; -- if (Context.getLangOpts().OpenCLCPlusPlus) -- OpenCLVersion = 200; -- if (OpenCLVersion < OpenCLBuiltin.MinVersion) -- continue; -- if ((OpenCLBuiltin.MaxVersion != 0) && -- (OpenCLVersion >= OpenCLBuiltin.MaxVersion)) -+ if (!(OpenCLBuiltin.AllVersions & VersionCode)) - continue; - - SmallVector RetTypes; -diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp -index 93ddd047e09b..93211b801f8d 100644 ---- a/clang/lib/Sema/SemaType.cpp -+++ b/clang/lib/Sema/SemaType.cpp -@@ -2016,7 +2016,7 @@ static QualType deduceOpenCLPointeeAddrSpace(Sema &S, QualType PointeeType) { - !PointeeType.hasAddressSpace()) - PointeeType = S.getASTContext().getAddrSpaceQualType( - PointeeType, -- S.getLangOpts().OpenCLCPlusPlus || S.getLangOpts().OpenCLVersion == 200 -+ S.getOpenCLOptions().isEnabled("__opencl_c_generic_address_space") - ? LangAS::opencl_generic - : LangAS::opencl_private); - return PointeeType; -@@ -5160,9 +5160,15 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, - } - - case DeclaratorChunk::Pipe: { -- T = S.BuildReadPipeType(T, DeclType.Loc); -- processTypeAttrs(state, T, TAL_DeclSpec, -- D.getMutableDeclSpec().getAttributes()); -+ if (S.getOpenCLOptions().isEnabled("__opencl_c_pipes")) { -+ T = S.BuildReadPipeType(T, DeclType.Loc); -+ processTypeAttrs(state, T, TAL_DeclSpec, -+ D.getMutableDeclSpec().getAttributes()); -+ } else { -+ D.setInvalidType(true); -+ T = Context.IntTy; -+ S.Diag(D.getIdentifierLoc(), diag::err_opencl_pipes_require_feat); -+ } - break; - } - } -diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl -index cdbf28bbcad8..0bedff5ef0f3 100644 ---- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl -+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl -@@ -2,7 +2,8 @@ - // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s - // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s - // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s -- -+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s -+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s - typedef int int2 __attribute__((ext_vector_type(2))); - - typedef struct { -@@ -39,7 +40,7 @@ struct LargeStructTwoMember { - int2 y[20]; - }; - --#if __OPENCL_C_VERSION__ >= 200 -+#ifdef __opencl_c_program_scope_global_variables - struct LargeStructOneMember g_s; - #endif - -@@ -98,7 +99,7 @@ void FuncOneLargeMember(struct LargeStructOneMember u) { - // AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)* - // AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false) - // AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) --#if __OPENCL_C_VERSION__ >= 200 -+#ifdef __opencl_c_program_scope_global_variables - void test_indirect_arg_globl(void) { - FuncOneLargeMember(g_s); - } -diff --git a/clang/test/CodeGenOpenCL/address-spaces-conversions.cl b/clang/test/CodeGenOpenCL/address-spaces-conversions.cl -index 52feccc540bb..9ecffcca5ee9 100644 ---- a/clang/test/CodeGenOpenCL/address-spaces-conversions.cl -+++ b/clang/test/CodeGenOpenCL/address-spaces-conversions.cl -@@ -1,5 +1,7 @@ - // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck %s - // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s -+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s - // When -ffake-address-space-map is not used, all addr space mapped to 0 for x86_64. - - // test that we generate address space casts everywhere we need conversions of -diff --git a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl -index 50622f099143..e19ec8451d0d 100644 ---- a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl -+++ b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl -@@ -6,6 +6,7 @@ - // We check that the address spaces are mangled the same in both version of OpenCL - // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s - // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s - - // We can't name this f as private is equivalent to default - // no specifier given address space so we get multiple definition -@@ -47,7 +48,7 @@ void f(constant int *arg) { } - // OCL-20-DAG: @_Z1fPU3AS2i - // OCL-12-DAG: @_Z1fPU3AS2i - --#if __OPENCL_C_VERSION__ >= 200 -+#if __OPENCL_C_VERSION__ == 200 - __attribute__((overloadable)) - void f(generic int *arg) { } - // ASMANG20: @_Z1fPU3AS4i -diff --git a/clang/test/CodeGenOpenCL/address-spaces.cl b/clang/test/CodeGenOpenCL/address-spaces.cl -index 3c8fea2a80bc..26a741338b3a 100644 ---- a/clang/test/CodeGenOpenCL/address-spaces.cl -+++ b/clang/test/CodeGenOpenCL/address-spaces.cl -@@ -1,9 +1,13 @@ - // RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR -+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR - // RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR - // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s -+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s - // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20AMDGCN - // RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s -+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s - // RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s -+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s - - // SPIR: %struct.S = type { i32, i32, i32* } - // CL20SPIR: %struct.S = type { i32, i32, i32 addrspace(4)* } -diff --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl -index 7216cb517495..8d3b30fe8074 100644 ---- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl -+++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl -@@ -1,4 +1,5 @@ - // RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s -+// RUN: %clang_cc1 -O0 -cl-std=CL3.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s - // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s - - // CL12-LABEL: define void @func1(i32 addrspace(5)* %x) -diff --git a/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl b/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl -index a5d438933fa4..8073c7756eb6 100644 ---- a/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl -+++ b/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl -@@ -4,6 +4,17 @@ - // RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s - // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL1.2 %s -emit-llvm -o - | FileCheck %s - // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 -triple r600 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 -triple r600 -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s -+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s - - #ifdef __AMDGCN__ - #define PTSIZE 8 -@@ -11,7 +22,7 @@ - #define PTSIZE 4 - #endif - --#ifdef cl_khr_fp64 -+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) - #pragma OPENCL EXTENSION cl_khr_fp64 : enable - #endif - #ifdef cl_khr_fp16 -@@ -59,8 +70,12 @@ void test() { - check(__alignof__(double) == 8); - #endif - -- check(sizeof(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4)); -- check(__alignof__(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4)); -+ check(sizeof(private void*) == 4); -+ check(__alignof__(private void*) == 4); -+#ifdef __opencl_c_generic_address_space -+ check(sizeof(generic void*) == 8); -+ check(__alignof__(generic void*) == 8); -+#endif - check(sizeof(global_ptr_t) == PTSIZE); - check(__alignof__(global_ptr_t) == PTSIZE); - check(sizeof(constant_ptr_t) == PTSIZE); -diff --git a/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl -index d1ab6aceac5c..70c5bace023b 100644 ---- a/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl -+++ b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl -@@ -1,4 +1,5 @@ - // RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL1.2 -emit-llvm -o - -O0 | FileCheck %s -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s - - #pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable - void test_int8(uchar4 ua, uchar4 ub, char4 sa, char4 sb) { -diff --git a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl -index 76ace5dca21e..5dc43e222f75 100644 ---- a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl -+++ b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl -@@ -1,6 +1,8 @@ - // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM - // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM - // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM -+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM -+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM - - kernel void ker() {}; - // CHECK: define{{.*}}@ker() #0 -diff --git a/clang/test/CodeGenOpenCL/fpmath.cl b/clang/test/CodeGenOpenCL/fpmath.cl -index 0108d909c94e..b28392739c71 100644 ---- a/clang/test/CodeGenOpenCL/fpmath.cl -+++ b/clang/test/CodeGenOpenCL/fpmath.cl -@@ -2,6 +2,8 @@ - // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s - // RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s - // RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL1.2 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s -+// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL3.0 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s -+// RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s - - typedef __attribute__(( ext_vector_type(4) )) float float4; - -diff --git a/clang/test/CodeGenOpenCL/generic-address-space-feature.cl b/clang/test/CodeGenOpenCL/generic-address-space-feature.cl -new file mode 100644 -index 000000000000..890860abe4d9 ---- /dev/null -+++ b/clang/test/CodeGenOpenCL/generic-address-space-feature.cl -@@ -0,0 +1,28 @@ -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL12 -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL12 -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL20 -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL20 -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30 -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,__opencl_c_generic_address_space -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30-GENERIC -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL30 -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,__opencl_c_generic_address_space -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL30-GENERIC -+ -+void test(global float* src1, local float *src2, private float *src3, float *src4, float tmp) { -+ // CL20: %{{.+}} = addrspacecast float addrspace(1)* %{{.+}} to float addrspace(4)* -+ // CL12-NOT: addrspacecast -+ // CL30-NOT: addrspacecast -+ // CL30-GENERIC-NOT: addrspacecast -+ tmp = sincos(tmp, src1); -+ // CL20: %{{.+}} = addrspacecast float addrspace(3)* %{{.+}} to float addrspace(4)* -+ // CL12-NOT: addrspacecast -+ // CL30-NOT: addrspacecast -+ // CL30-GENERIC-NOT: addrspacecast -+ tmp = sincos(tmp, src2); -+ -+ // CL12: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}}) -+ // CL20: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}}) -+ // CL30: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}}) -+ // CL30-GENERIC: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}}) -+ // CHECK: addrspacecast -+ tmp = sincos(tmp, src4); -+} -diff --git a/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl b/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl -index 515f13f6e768..5aa31ac6f345 100644 ---- a/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl -+++ b/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl -@@ -1,4 +1,5 @@ - // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s -+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s - - // CHECK: %opencl.intel_sub_group_avc_mce_payload_t = type opaque - // CHECK: %opencl.intel_sub_group_avc_ime_payload_t = type opaque -diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl -index e89237623478..3d6708ac361f 100644 ---- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl -+++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl -@@ -1,5 +1,8 @@ - // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s - // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s -+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s -+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s -+ - // Test that the kernels always use the SPIR calling convention - // to have unambiguous mapping of arguments to feasibly implement - // clSetKernelArg(). -diff --git a/clang/test/CodeGenOpenCL/logical-ops.cl b/clang/test/CodeGenOpenCL/logical-ops.cl -index f083a8580ee7..499eab65039b 100644 ---- a/clang/test/CodeGenOpenCL/logical-ops.cl -+++ b/clang/test/CodeGenOpenCL/logical-ops.cl -@@ -1,4 +1,5 @@ - // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s -+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s - // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=clc++ -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s - - #pragma OPENCL EXTENSION cl_khr_fp64 : enable -diff --git a/clang/test/CodeGenOpenCL/no-half.cl b/clang/test/CodeGenOpenCL/no-half.cl -index aee8f678f01a..46da7fa339e8 100644 ---- a/clang/test/CodeGenOpenCL/no-half.cl -+++ b/clang/test/CodeGenOpenCL/no-half.cl -@@ -1,6 +1,7 @@ - // RUN: %clang_cc1 %s -cl-std=cl2.0 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s - // RUN: %clang_cc1 %s -cl-std=cl1.2 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s - // RUN: %clang_cc1 %s -cl-std=cl1.1 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s -+// RUN: %clang_cc1 %s -cl-std=cl3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s - - #pragma OPENCL EXTENSION cl_khr_fp64:enable - -diff --git a/clang/test/CodeGenOpenCL/pipe_builtin.cl b/clang/test/CodeGenOpenCL/pipe_builtin.cl -index 02b9669b7ab1..0bf35c336199 100644 ---- a/clang/test/CodeGenOpenCL/pipe_builtin.cl -+++ b/clang/test/CodeGenOpenCL/pipe_builtin.cl -@@ -1,4 +1,7 @@ - // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=clc++ -o - %s | FileCheck %s -+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl2.0 -o - %s | FileCheck %s -+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl3.0 -cl-ext=__opencl_c_pipes,__opencl_c_subgroups -o - %s | FileCheck %s -+ - // FIXME: Add MS ABI manglings of OpenCL things and remove %itanium_abi_triple - // above to support OpenCL in the MS C++ ABI. - -diff --git a/clang/test/CodeGenOpenCL/pipe_types.cl b/clang/test/CodeGenOpenCL/pipe_types.cl -index ba064c6d7557..b7a523d4f084 100644 ---- a/clang/test/CodeGenOpenCL/pipe_types.cl -+++ b/clang/test/CodeGenOpenCL/pipe_types.cl -@@ -1,4 +1,5 @@ - // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s -+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -o - %s | FileCheck %s - - // CHECK: %opencl.pipe_ro_t = type opaque - // CHECK: %opencl.pipe_wo_t = type opaque -diff --git a/clang/test/CodeGenOpenCL/printf.cl b/clang/test/CodeGenOpenCL/printf.cl -index fc139d776db6..0133c5595d81 100644 ---- a/clang/test/CodeGenOpenCL/printf.cl -+++ b/clang/test/CodeGenOpenCL/printf.cl -@@ -1,5 +1,7 @@ - // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-+cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s - // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s -+// RUN: %clang_cc1 -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s -+// RUN: %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s - - typedef __attribute__((ext_vector_type(2))) float float2; - typedef __attribute__((ext_vector_type(2))) half half2; -diff --git a/clang/test/CodeGenOpenCL/unroll-hint.cl b/clang/test/CodeGenOpenCL/unroll-hint.cl -index 0f84450a1ae6..9347c935869b 100644 ---- a/clang/test/CodeGenOpenCL/unroll-hint.cl -+++ b/clang/test/CodeGenOpenCL/unroll-hint.cl -@@ -1,5 +1,6 @@ - // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s - // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s | FileCheck %s -+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s | FileCheck %s - - /*** for ***/ - void for_count() -diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c -index 5c0bfb69f9a3..eabdf0b5938d 100644 ---- a/clang/test/Driver/autocomplete.c -+++ b/clang/test/Driver/autocomplete.c -@@ -43,6 +43,8 @@ - // CLSTDALL-NEXT: CL1.2 - // CLSTDALL-NEXT: cl2.0 - // CLSTDALL-NEXT: CL2.0 -+// CLSTDALL-NEXT: cl3.0 -+// CLSTDALL-NEXT: CL3.0 - // CLSTDALL-NEXT: clc++ - // CLSTDALL-NEXT: CLC++ - // RUN: %clang --autocomplete=-fno-sanitize-coverage=,f | FileCheck %s -check-prefix=FNOSANICOVER -diff --git a/clang/test/Driver/opencl.cl b/clang/test/Driver/opencl.cl -index 63b04bc1af41..ffdd4f7d65bb 100644 ---- a/clang/test/Driver/opencl.cl -+++ b/clang/test/Driver/opencl.cl -@@ -2,6 +2,7 @@ - // RUN: %clang -S -### -cl-std=CL1.1 %s 2>&1 | FileCheck --check-prefix=CHECK-CL11 %s - // RUN: %clang -S -### -cl-std=CL1.2 %s 2>&1 | FileCheck --check-prefix=CHECK-CL12 %s - // RUN: %clang -S -### -cl-std=CL2.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL20 %s -+// RUN: %clang -S -### -cl-std=CL3.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL30 %s - // RUN: %clang -S -### -cl-std=clc++ %s 2>&1 | FileCheck --check-prefix=CHECK-CLCPP %s - // RUN: %clang -S -### -cl-opt-disable %s 2>&1 | FileCheck --check-prefix=CHECK-OPT-DISABLE %s - // RUN: %clang -S -### -cl-strict-aliasing %s 2>&1 | FileCheck --check-prefix=CHECK-STRICT-ALIASING %s -@@ -22,6 +23,7 @@ - // CHECK-CL11: "-cc1" {{.*}} "-cl-std=CL1.1" - // CHECK-CL12: "-cc1" {{.*}} "-cl-std=CL1.2" - // CHECK-CL20: "-cc1" {{.*}} "-cl-std=CL2.0" -+// CHECK-CL30: "-cc1" {{.*}} "-cl-std=CL3.0" - // CHECK-CLCPP: "-cc1" {{.*}} "-cl-std=clc++" - // CHECK-OPT-DISABLE: "-cc1" {{.*}} "-cl-opt-disable" - // CHECK-STRICT-ALIASING: "-cc1" {{.*}} "-cl-strict-aliasing" -diff --git a/clang/test/Driver/unknown-std.cl b/clang/test/Driver/unknown-std.cl -index 6f371bac13ac..00209fb62556 100644 ---- a/clang/test/Driver/unknown-std.cl -+++ b/clang/test/Driver/unknown-std.cl -@@ -10,6 +10,7 @@ - // CHECK-NEXT: note: use 'cl1.1' for 'OpenCL 1.1' standard - // CHECK-NEXT: note: use 'cl1.2' for 'OpenCL 1.2' standard - // CHECK-NEXT: note: use 'cl2.0' for 'OpenCL 2.0' standard -+// CHECK-NEXT: note: use 'cl3.0' for 'OpenCL 3.0' standard - // CHECK-NEXT: note: use 'clc++' for 'C++ for OpenCL' standard - - // Make sure that no other output is present. -diff --git a/clang/test/Frontend/stdlang.c b/clang/test/Frontend/stdlang.c -index 51484999e37a..eac4632fbdd6 100644 ---- a/clang/test/Frontend/stdlang.c -+++ b/clang/test/Frontend/stdlang.c -@@ -9,6 +9,7 @@ - // RUN: %clang_cc1 -x cl -cl-std=CL1.1 -DOPENCL %s - // RUN: %clang_cc1 -x cl -cl-std=CL1.2 -DOPENCL %s - // RUN: %clang_cc1 -x cl -cl-std=CL2.0 -DOPENCL %s -+// RUN: %clang_cc1 -x cl -cl-std=CL3.0 -DOPENCL %s - // RUN: %clang_cc1 -x cl -cl-std=CLC++ -DOPENCL %s - // RUN: not %clang_cc1 -x cl -std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s - // RUN: not %clang_cc1 -x cl -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s -diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl -index 1b151ffdd16a..2716076acdcf 100644 ---- a/clang/test/Headers/opencl-c-header.cl -+++ b/clang/test/Headers/opencl-c-header.cl -@@ -1,6 +1,7 @@ - // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify | FileCheck %s - // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.1 | FileCheck %s - // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.2 | FileCheck %s -+// RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL3.0 | FileCheck %s - // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=clc++ | FileCheck %s --check-prefix=CHECK20 - - // Test including the default header as a module. -@@ -39,9 +40,11 @@ - // RUN: rm -rf %t - // RUN: mkdir -p %t - // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s -+// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s - // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s - // RUN: chmod u-w %t - // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s -+// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s - // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s - // RUN: chmod u+w %t - -@@ -67,7 +70,7 @@ char f(char x) { - // from OpenCL 2.0 onwards. - - // CHECK20: _Z12write_imagef14ocl_image3d_wo --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) - void test_image3dwo(write_only image3d_t img) { - write_imagef(img, (0), (0.0f)); - } -@@ -81,7 +84,7 @@ void test_atomics(__generic volatile unsigned int* a) { - #endif - - // Verify that ATOMIC_VAR_INIT is defined. --#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) - global atomic_int z = ATOMIC_VAR_INIT(99); - #endif //__OPENCL_C_VERSION__ - -diff --git a/clang/test/Index/pipe-size.cl b/clang/test/Index/pipe-size.cl -index 94a1255f0a48..59b76051eda1 100644 ---- a/clang/test/Index/pipe-size.cl -+++ b/clang/test/Index/pipe-size.cl -@@ -2,6 +2,13 @@ - // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR - // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64 - // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN -+// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=X86 -+// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR -+// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64 -+// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN -+ -+ -+ - __kernel void testPipe( pipe int test ) - { - int s = sizeof(test); -diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c -index def105f4c52e..b088a37ba665 100644 ---- a/clang/test/Preprocessor/predefined-macros.c -+++ b/clang/test/Preprocessor/predefined-macros.c -@@ -129,6 +129,8 @@ - // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL12 - // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL2.0 \ - // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL20 -+// RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL3.0 \ -+// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL30 - // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-fast-relaxed-math \ - // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FRM - // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=clc++ \ -@@ -137,26 +139,37 @@ - // CHECK-CL10: #define CL_VERSION_1_1 110 - // CHECK-CL10: #define CL_VERSION_1_2 120 - // CHECK-CL10: #define CL_VERSION_2_0 200 -+// CHECK-CL10: #define CL_VERSION_3_0 300 - // CHECK-CL10: #define __OPENCL_C_VERSION__ 100 - // CHECK-CL10-NOT: #define __FAST_RELAXED_MATH__ 1 - // CHECK-CL11: #define CL_VERSION_1_0 100 - // CHECK-CL11: #define CL_VERSION_1_1 110 - // CHECK-CL11: #define CL_VERSION_1_2 120 - // CHECK-CL11: #define CL_VERSION_2_0 200 -+// CHECK-CL11: #define CL_VERSION_3_0 300 - // CHECK-CL11: #define __OPENCL_C_VERSION__ 110 - // CHECK-CL11-NOT: #define __FAST_RELAXED_MATH__ 1 - // CHECK-CL12: #define CL_VERSION_1_0 100 - // CHECK-CL12: #define CL_VERSION_1_1 110 - // CHECK-CL12: #define CL_VERSION_1_2 120 - // CHECK-CL12: #define CL_VERSION_2_0 200 -+// CHECK-CL12: #define CL_VERSION_3_0 300 - // CHECK-CL12: #define __OPENCL_C_VERSION__ 120 - // CHECK-CL12-NOT: #define __FAST_RELAXED_MATH__ 1 - // CHECK-CL20: #define CL_VERSION_1_0 100 - // CHECK-CL20: #define CL_VERSION_1_1 110 - // CHECK-CL20: #define CL_VERSION_1_2 120 - // CHECK-CL20: #define CL_VERSION_2_0 200 -+// CHECK-CL20: #define CL_VERSION_3_0 300 - // CHECK-CL20: #define __OPENCL_C_VERSION__ 200 - // CHECK-CL20-NOT: #define __FAST_RELAXED_MATH__ 1 -+// CHECK-CL30: #define CL_VERSION_1_0 100 -+// CHECK-CL30: #define CL_VERSION_1_1 110 -+// CHECK-CL30: #define CL_VERSION_1_2 120 -+// CHECK-CL30: #define CL_VERSION_2_0 200 -+// CHECK-CL30: #define CL_VERSION_3_0 300 -+// CHECK-CL30: #define __OPENCL_C_VERSION__ 300 -+// CHECK-CL30-NOT: #define __FAST_RELAXED_MATH__ 1 - // CHECK-FRM: #define __FAST_RELAXED_MATH__ 1 - // CHECK-CLCPP10: #define __CL_CPP_VERSION_1_0__ 100 - // CHECK-CLCPP10: #define __OPENCL_CPP_VERSION__ 100 -diff --git a/clang/test/Sema/feature-extensions-simult-support.cl b/clang/test/Sema/feature-extensions-simult-support.cl -new file mode 100644 -index 000000000000..0789105002b2 ---- /dev/null -+++ b/clang/test/Sema/feature-extensions-simult-support.cl -@@ -0,0 +1,75 @@ -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_fp64 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_3d_image_writes -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_3d_image_writes -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_subgroups -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_subgroups -+ -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_fp64 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_3d_image_writes -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_3d_image_writes -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_subgroups -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups -+ -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_device_enqueue -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_read_write_images -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64,-cl_khr_fp64 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes,-cl_khr_3d_image_writes -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups,-cl_khr_subgroups -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_fp64 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_3d_image_writes -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_subgroups -+ -+// expected-no-diagnostics -+ -+#ifdef cl_khr_fp64 -+ #ifndef __opencl_c_fp64 -+ #error macros were not properly set up -+ #endif -+#endif -+#ifdef __opencl_c_fp64 -+ #ifndef cl_khr_fp64 -+ #error macros were not properly set up -+ #endif -+#endif -+ -+#ifdef cl_khr_3d_image_writes -+ #ifndef __opencl_c_3d_image_writes -+ #error macros were not properly set up -+ #endif -+#endif -+#ifdef __opencl_c_3d_image_writes -+ #ifndef cl_khr_3d_image_writes -+ #error macros were not properly set up -+ #endif -+#endif -+ -+#ifdef cl_khr_subgroups -+ #ifndef __opencl_c_subgroups -+ #error macros were not properly set up -+ #endif -+#endif -+#ifdef __opencl_c_subgroups -+ #ifndef cl_khr_subgroups -+ #error macros were not properly set up -+ #endif -+#endif -+ -+#if defined(__opencl_c_pipes) || defined(__opencl_c_device_enqueue) -+ #ifndef __opencl_c_generic_address_space -+ #error macros were not properly set up -+ #endif -+#endif -+ -+#if defined(__opencl_c_3d_image_writes) || defined(__opencl_c_read_write_images) -+ #ifndef __opencl_c_images -+ #error macros were not properly set up -+ #endif -+#endif -+ -+kernel void test(){} -diff --git a/clang/test/Sema/features-ignore-pragma.cl b/clang/test/Sema/features-ignore-pragma.cl -new file mode 100644 -index 000000000000..046ce5390754 ---- /dev/null -+++ b/clang/test/Sema/features-ignore-pragma.cl -@@ -0,0 +1,24 @@ -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_3d_image_writes -+ -+#pragma OPENCL EXTENSION __opencl_c_fp64 : enable -+// expected-warning@-1 {{OpenCL feature support can't be controlled via pragma, ignoring}} -+ -+#pragma OPENCL EXTENSION cl_khr_fp64 : enable -+#ifndef __opencl_c_fp64 -+// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_fp64' - ignoring}} -+#endif -+ -+#pragma OPENCL EXTENSION cl_khr_subgroups : enable -+#ifndef __opencl_c_subgroups -+// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_subgroups' - ignoring}} -+#endif -+ -+#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable -+#ifndef __opencl_c_3d_image_writes -+// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_3d_image_writes' - ignoring}} -+#endif -+ -+kernel void foo() {} -diff --git a/clang/test/Sema/opencl-features-pipes.cl b/clang/test/Sema/opencl-features-pipes.cl -new file mode 100644 -index 000000000000..c0ac778f24a6 ---- /dev/null -+++ b/clang/test/Sema/opencl-features-pipes.cl -@@ -0,0 +1,18 @@ -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.1 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -DHAS -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -DHAS -+// expected-no-diagnostics -+ -+#ifdef HAS -+ #ifndef __opencl_c_pipes -+ #error Feature should be defined -+ #endif -+#else -+ #ifdef __opencl_c_pipes -+ #error Feature should not be defined -+ #endif -+#endif -+ -+kernel void foo() {} -diff --git a/clang/test/Sema/opencl-features.cl b/clang/test/Sema/opencl-features.cl -new file mode 100644 -index 000000000000..aa432f6b60bf ---- /dev/null -+++ b/clang/test/Sema/opencl-features.cl -@@ -0,0 +1,128 @@ -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-__opencl_c_device_enqueue,-__opencl_c_pipes,-__opencl_c_read_write_images -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CLC++ -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -+// expected-no-diagnostics -+ -+#ifndef __opencl_c_int64 -+ #error Feature __opencl_c_int64 shouldn't be defined -+#endif -+ -+#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) -+ #ifndef __opencl_c_3d_image_writes -+ #error Feature __opencl_c_3d_image_writes should be defined -+ #endif -+ -+ #ifndef __opencl_c_atomic_order_acq_rel -+ #error Feature __opencl_c_atomic_order_acq_rel should be defined -+ #endif -+ -+ #ifndef __opencl_c_atomic_order_seq_cst -+ #error Feature __opencl_c_atomic_order_seq_cst should be defined -+ #endif -+ -+ #ifndef __opencl_c_atomic_scope_device -+ #error Feature __opencl_c_atomic_scope_device should be defined -+ #endif -+ -+ #ifndef __opencl_c_atomic_scope_all_devices -+ #error Feature __opencl_c_atomic_scope_all_devices should be defined -+ #endif -+ -+ #ifndef __opencl_c_device_enqueue -+ #error Feature __opencl_c_device_enqueue should be defined -+ #endif -+ -+ #ifndef __opencl_c_generic_address_space -+ #error Feature __opencl_c_generic_address_space should be defined -+ #endif -+ -+ #ifndef __opencl_c_pipes -+ #error Feature __opencl_c_pipes should be defined -+ #endif -+ -+ #ifndef __opencl_c_program_scope_global_variables -+ #error Feature __opencl_c_program_scope_global_variables should be defined -+ #endif -+ -+ #ifndef __opencl_c_read_write_images -+ #error Feature __opencl_c_read_write_images should be defined -+ #endif -+ -+ #ifndef __opencl_c_subgroups -+ #error Feature __opencl_c_subgroups should be defined -+ #endif -+ -+ #ifndef __opencl_c_work_group_collective_functions -+ #error Feature __opencl_c_work_group_collective_functions should be defined -+ #endif -+ -+ #ifndef __opencl_c_fp64 -+ #error Feature __opencl_c_fp64 should be defined -+ #endif -+ -+ #ifndef __opencl_c_images -+ #error Feature __opencl_c_images should be defined -+ #endif -+#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -+ -+ -+#if __OPENCL_C_VERSION__ == CL_VERSION_3_0 -+ #ifdef __opencl_c_3d_image_writes -+ #error Feature __opencl_c_3d_image_writes shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_atomic_order_acq_rel -+ #error Feature __opencl_c_atomic_order_acq_rel shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_atomic_order_seq_cst -+ #error Feature __opencl_c_atomic_order_seq_cst shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_atomic_scope_device -+ #error Feature __opencl_c_atomic_scope_device shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_atomic_scope_all_devices -+ #error Feature __opencl_c_atomic_scope_all_devices shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_device_enqueue -+ #error Feature __opencl_c_device_enqueue shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_generic_address_space -+ #error Feature __opencl_c_generic_address_space shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_pipes -+ #error Feature __opencl_c_pipes shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_program_scope_global_variables -+ #error Feature __opencl_c_program_scope_global_variables shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_read_write_images -+ #error Feature __opencl_c_read_write_images shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_subgroups -+ #error Feature __opencl_c_subgroups shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_work_group_collective_functions -+ #error Feature __opencl_c_work_group_collective_functions shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_fp64 -+ #error Feature __opencl_c_fp64 shouldn't be defined -+ #endif -+ -+ #ifdef __opencl_c_images -+ #error Feature __opencl_c_images shouldn't be defined -+ #endif -+#endif // __OPENCL_C_VERSION__ == CL_VERSION_3_0 -+ -+kernel void foo() {} -diff --git a/clang/test/Sema/pipe_builtins_feature.cl b/clang/test/Sema/pipe_builtins_feature.cl -new file mode 100644 -index 000000000000..56fa94fc7705 ---- /dev/null -+++ b/clang/test/Sema/pipe_builtins_feature.cl -@@ -0,0 +1,21 @@ -+// RUN: %clang_cc1 -cl-std=CL2.0 -fsyntax-only -verify %s -+// RUN: %clang_cc1 -cl-std=CL3.0 -fsyntax-only -verify %s -+// RUN: %clang_cc1 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -fsyntax-only -verify %s -+ -+#ifdef __opencl_c_pipes -+ #ifndef __opencl_c_generic_address_space -+ #error Generic address space feature must also be defined -+ #endif -+// CHECK: expected-no-diagnostics -+// check that pragma disable all doesn't touch feature support -+ #pragma OPENCL EXTENSION all : disable -+#endif -+ -+void test(read_only pipe int p, global int *ptr) { -+ reserve_id_t rid; -+} -+ -+#ifndef __opencl_c_pipes -+// expected-error@-5 {{expected parameter declarator}} expected-error@-5 {{expected ')'}} expected-note@-5 {{to match this '('}} -+// expected-error@-5 {{use of type 'reserve_id_t' requires __opencl_c_pipes extension to be enabled}} -+#endif -diff --git a/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl b/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl -index 5efea216346a..09aea27ae6de 100644 ---- a/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl -+++ b/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl -@@ -1,6 +1,9 @@ - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL2.0 - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL2.0 - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL2.0 -+// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -+// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -+// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=clc++ - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=clc++ - // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=clc++ -diff --git a/clang/test/SemaOpenCL/address-spaces.cl b/clang/test/SemaOpenCL/address-spaces.cl -index 07547ea19680..e367a7c57292 100644 ---- a/clang/test/SemaOpenCL/address-spaces.cl -+++ b/clang/test/SemaOpenCL/address-spaces.cl -@@ -1,5 +1,6 @@ - // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only - // RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only -+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -verify -pedantic -fsyntax-only - // RUN: %clang_cc1 %s -cl-std=clc++ -verify -pedantic -fsyntax-only - - __constant int ci = 1; -diff --git a/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl b/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl -index f63e2913c749..727141190a0b 100644 ---- a/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl -+++ b/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl -@@ -2,6 +2,12 @@ - // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir-unknown-unknown" -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile" - // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS= - // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile" -+// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS= -+// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile" -+// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS= -+// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile" -+ -+ - - typedef struct {int a;} ndrange_t; - // Diagnostic tests for different overloads of enqueue_kernel from Table 6.13.17.1 of OpenCL 2.0 Spec. -@@ -235,11 +241,17 @@ kernel void bar(global unsigned int *buf) - kernel void foo1(global unsigned int *buf) - { - ndrange_t n; -- buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} -+ buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); -+#if __OPENCL_C_VERSION__ < 300 -+// expected-error@-2 {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} -+#endif - } - - kernel void bar1(global unsigned int *buf) - { - ndrange_t n; -- buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} -+ buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); -+#if __OPENCL_C_VERSION__ < 300 -+// expected-error@-2 {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} -+#endif - } -diff --git a/clang/test/SemaOpenCL/forget-unsupported-builtins.cl b/clang/test/SemaOpenCL/forget-unsupported-builtins.cl -new file mode 100644 -index 000000000000..14dd03e2c7db ---- /dev/null -+++ b/clang/test/SemaOpenCL/forget-unsupported-builtins.cl -@@ -0,0 +1,22 @@ -+// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -verify %s -triple spir-unknown-unknown -+// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -cl-ext=__opencl_c_pipes,__opencl_c_generic_address_space,__opencl_c_device_enqueue -verify %s -triple spir-unknown-unknown -DFEATURES -+ -+#ifndef FEATURES -+ // expected-no-diagnostics -+#else -+ // expected-error@+10 {{cannot redeclare builtin function 'get_pipe_max_packets'}} -+ // expected-note@+9 {{'get_pipe_max_packets' is a builtin with type 'unsigned int ()'}} -+ // expected-error@+9 {{cannot redeclare builtin function 'to_local'}} -+ // expected-note@+8 {{'to_local' is a builtin with type 'void *(void *)'}} -+ // expected-error@+8 {{cannot redeclare builtin function 'to_global'}} -+ // expected-note@+7 {{'to_global' is a builtin with type 'void *(void *)'}} -+ // expected-error@+7 {{cannot redeclare builtin function 'get_kernel_work_group_size'}} -+ // expected-note@+6 {{'get_kernel_work_group_size' is a builtin with type 'unsigned int ()'}} -+#endif -+ -+int get_pipe_max_packets(int); -+int to_local(int); -+int to_global(int); -+int get_kernel_work_group_size(int); -+ -+kernel void test(global int *dst) {} -diff --git a/clang/test/SemaOpenCL/image-features.cl b/clang/test/SemaOpenCL/image-features.cl -new file mode 100644 -index 000000000000..ace6913bb31e ---- /dev/null -+++ b/clang/test/SemaOpenCL/image-features.cl -@@ -0,0 +1,20 @@ -+// RUN: %clang_cc1 -cl-std=cl2.0 -fsyntax-only -verify %s -triple spir-unknown-unknown -+// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_images -fsyntax-only -verify %s -triple spir-unknown-unknown -+// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_3d_image_writes -fsyntax-only -verify %s -triple spir-unknown-unknown -+// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_read_write_images -fsyntax-only -verify %s -triple spir-unknown-unknown -+ -+#if defined(__opencl_c_read_write_images) && defined(__opencl_c_3d_image_writes) -+ // expected-no-diagnostics -+#endif -+ -+__kernel void write_3d_image(__write_only image3d_t i) {} -+ -+#ifndef __opencl_c_3d_image_writes -+ // expected-error@-3 {{use of type '__write_only image3d_t' requires __opencl_c_3d_image_writes extension to be enabled}} -+#endif -+ -+__kernel void read_write_3d_image(__read_write image3d_t i) { } -+ -+#ifndef __opencl_c_read_write_images -+ // expected-error@-3 {{use of type '__read_write image3d_t' requires __opencl_c_read_write_images extension to be enabled}} -+#endif -diff --git a/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl b/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl -index 36e76621d24a..38b0a04726e3 100644 ---- a/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl -+++ b/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl -@@ -1,4 +1,5 @@ - // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups,__opencl_c_pipes - - #pragma OPENCL EXTENSION cl_khr_subgroups : enable - -diff --git a/clang/test/SemaOpenCL/storageclass-cl20.cl b/clang/test/SemaOpenCL/storageclass-cl20.cl -index 581701d2a6a5..469c526ebc30 100644 ---- a/clang/test/SemaOpenCL/storageclass-cl20.cl -+++ b/clang/test/SemaOpenCL/storageclass-cl20.cl -@@ -1,4 +1,5 @@ - // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables,__opencl_c_generic_address_space - - int G2 = 0; - global int G3 = 0; -diff --git a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp -index 41d33b550680..f50652493e5e 100644 ---- a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp -+++ b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp -@@ -56,6 +56,7 @@ - //===----------------------------------------------------------------------===// - - #include "TableGenBackends.h" -+#include "clang/Basic/OpenCLOptions.h" - #include "llvm/ADT/MapVector.h" - #include "llvm/ADT/STLExtras.h" - #include "llvm/ADT/SmallString.h" -@@ -69,6 +70,7 @@ - #include "llvm/TableGen/Record.h" - #include "llvm/TableGen/StringMatcher.h" - #include "llvm/TableGen/TableGenBackend.h" -+#include - #include - - using namespace llvm; -@@ -228,6 +230,10 @@ private: - // The function "tan", having the same signatures, would be mapped to the - // same entry (). - MapVector SignatureListMap; -+ -+ // Encode all versions -+ unsigned short -+ EncodeBuiltinVersions(std::vector BuiltinVersionsRecords) const; - }; - } // namespace - -@@ -338,12 +344,10 @@ struct OpenCLBuiltinStruct { - const bool IsConst; - // Function attribute __attribute__((convergent)) - const bool IsConv; -+ // All opencl versions encoded -+ const unsigned char AllVersions : 5; - // OpenCL extension(s) required for this overload. - const unsigned short Extension; -- // First OpenCL version in which this overload was introduced (e.g. CL20). -- const unsigned short MinVersion; -- // First OpenCL version in which this overload was removed (e.g. CL20). -- const unsigned short MaxVersion; - }; - - )"; -@@ -503,11 +507,9 @@ void BuiltinNameEmitter::EmitBuiltinTable() { - << (Overload.first->getValueAsBit("IsPure")) << ", " - << (Overload.first->getValueAsBit("IsConst")) << ", " - << (Overload.first->getValueAsBit("IsConv")) << ", " -- << FunctionExtensionIndex[ExtName] << ", " -- << Overload.first->getValueAsDef("MinVersion")->getValueAsInt("ID") -- << ", " -- << Overload.first->getValueAsDef("MaxVersion")->getValueAsInt("ID") -- << " },\n"; -+ << EncodeBuiltinVersions( -+ Overload.first->getValueAsListOfDefs("Versions")) -+ << ", " << FunctionExtensionIndex[ExtName] << " },\n"; - Index++; - } - } -@@ -528,10 +530,8 @@ bool BuiltinNameEmitter::CanReuseSignature( - if (Rec->getValueAsBit("IsPure") == Rec2->getValueAsBit("IsPure") && - Rec->getValueAsBit("IsConst") == Rec2->getValueAsBit("IsConst") && - Rec->getValueAsBit("IsConv") == Rec2->getValueAsBit("IsConv") && -- Rec->getValueAsDef("MinVersion")->getValueAsInt("ID") == -- Rec2->getValueAsDef("MinVersion")->getValueAsInt("ID") && -- Rec->getValueAsDef("MaxVersion")->getValueAsInt("ID") == -- Rec2->getValueAsDef("MaxVersion")->getValueAsInt("ID") && -+ EncodeBuiltinVersions(Rec->getValueAsListOfDefs("Versions")) == -+ EncodeBuiltinVersions(Rec2->getValueAsListOfDefs("Versions")) && - Rec->getValueAsDef("Extension")->getName() == - Rec2->getValueAsDef("Extension")->getName()) { - return true; -@@ -806,6 +806,15 @@ static void OCL2Qual(ASTContext &Context, const OpenCLTypeStruct &Ty, - OS << "\n} // OCL2Qual\n"; - } - -+unsigned short BuiltinNameEmitter::EncodeBuiltinVersions( -+ std::vector BuiltinVersionsRecords) const { -+ return std::accumulate( -+ BuiltinVersionsRecords.begin(), BuiltinVersionsRecords.end(), -+ (unsigned short)0, [](unsigned short C, Record *R) { -+ return C |= clang::OpenCLOptions::EncodeVersion(R->getValueAsInt("ID")); -+ }); -+} -+ - void clang::EmitClangOpenCLBuiltins(RecordKeeper &Records, raw_ostream &OS) { - BuiltinNameEmitter NameChecker(Records, OS); - NameChecker.Emit(); --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-llvm-spirv-skip-building-tests.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-llvm-spirv-skip-building-tests.patch deleted file mode 100644 index 84a4ba19..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-llvm-spirv-skip-building-tests.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 661021749a168c423d69d0ba7cdfa16fed860836 Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Wed, 21 Aug 2019 14:35:31 +0800 -Subject: [PATCH 1/3] llvm-spirv: skip building tests - -Some of these need clang to be built and since we're building this in-tree, -that leads to problems when compiling libcxx, compiler-rt which aren't built -in-tree. - -Instead of using SPIRV_SKIP_CLANG_BUILD to skip clang build and adding this to -all components, disable the building of tests altogether. - -Upstream-Status: Inappropriate - -Signed-off-by: Anuj Mittal -Signed-off-by: Naveen Saini ---- - CMakeLists.txt | 10 ---------- - 1 file changed, 10 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 92c50370..80999c98 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -25,13 +25,6 @@ if(LLVM_SPIRV_BUILD_EXTERNAL) - set(CMAKE_CXX_STANDARD 14) - set(CMAKE_CXX_STANDARD_REQUIRED ON) - -- if(LLVM_SPIRV_INCLUDE_TESTS) -- set(LLVM_TEST_COMPONENTS -- llvm-as -- llvm-dis -- ) -- endif(LLVM_SPIRV_INCLUDE_TESTS) -- - find_package(LLVM 10.0.0 REQUIRED - COMPONENTS - Analysis -@@ -63,9 +56,6 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include) - - add_subdirectory(lib/SPIRV) - add_subdirectory(tools/llvm-spirv) --if(LLVM_SPIRV_INCLUDE_TESTS) -- add_subdirectory(test) --endif(LLVM_SPIRV_INCLUDE_TESTS) - - install( - FILES --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch deleted file mode 100644 index 3f1b24e7..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch +++ /dev/null @@ -1,812 +0,0 @@ -From 3f544cfe44ee5f113a3fb554aca2cf5d64996062 Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Wed, 7 Apr 2021 16:38:38 +0800 -Subject: [PATCH 2/7] Add cl_khr_extended_subgroup extensions. - -Added extensions and their function declarations into -the standard header. - -Patch by Piotr Fusik! - -Tags: #clang - -Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4a4402f0d72167477a6252e4c3daf5089ebc8f9a] -Signed-off-by: Anastasia Stulova -Signed-off-by: Naveen Saini ---- - .../include/clang/Basic/OpenCLExtensions.def | 7 + - clang/lib/Headers/opencl-c.h | 668 ++++++++++++++++++ - clang/test/SemaOpenCL/extension-version.cl | 83 +++ - 3 files changed, 758 insertions(+) - -diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def -index 608f78a13eef..d1574164f9b2 100644 ---- a/clang/include/clang/Basic/OpenCLExtensions.def -+++ b/clang/include/clang/Basic/OpenCLExtensions.def -@@ -74,6 +74,13 @@ OPENCLEXT_INTERNAL(cl_khr_mipmap_image_writes, 200, ~0U) - OPENCLEXT_INTERNAL(cl_khr_srgb_image_writes, 200, ~0U) - OPENCLEXT_INTERNAL(cl_khr_subgroups, 200, ~0U) - OPENCLEXT_INTERNAL(cl_khr_terminate_context, 200, ~0U) -+OPENCLEXT_INTERNAL(cl_khr_subgroup_extended_types, 200, ~0U) -+OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_vote, 200, ~0U) -+OPENCLEXT_INTERNAL(cl_khr_subgroup_ballot, 200, ~0U) -+OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_arithmetic, 200, ~0U) -+OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle, 200, ~0U) -+OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle_relative, 200, ~0U) -+OPENCLEXT_INTERNAL(cl_khr_subgroup_clustered_reduce, 200, ~0U) - - // Clang Extensions. - OPENCLEXT_INTERNAL(cl_clang_storage_class_specifiers, 100, ~0U) -diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h -index 93a946cec5b1..67d900eb1c3d 100644 ---- a/clang/lib/Headers/opencl-c.h -+++ b/clang/lib/Headers/opencl-c.h -@@ -17530,6 +17530,674 @@ double __ovld __conv sub_group_scan_inclusive_max(double x); - - #endif //cl_khr_subgroups cl_intel_subgroups - -+#if defined(cl_khr_subgroup_extended_types) -+char __ovld __conv sub_group_broadcast( char value, uint index ); -+char2 __ovld __conv sub_group_broadcast( char2 value, uint index ); -+char3 __ovld __conv sub_group_broadcast( char3 value, uint index ); -+char4 __ovld __conv sub_group_broadcast( char4 value, uint index ); -+char8 __ovld __conv sub_group_broadcast( char8 value, uint index ); -+char16 __ovld __conv sub_group_broadcast( char16 value, uint index ); -+ -+uchar __ovld __conv sub_group_broadcast( uchar value, uint index ); -+uchar2 __ovld __conv sub_group_broadcast( uchar2 value, uint index ); -+uchar3 __ovld __conv sub_group_broadcast( uchar3 value, uint index ); -+uchar4 __ovld __conv sub_group_broadcast( uchar4 value, uint index ); -+uchar8 __ovld __conv sub_group_broadcast( uchar8 value, uint index ); -+uchar16 __ovld __conv sub_group_broadcast( uchar16 value, uint index ); -+ -+short __ovld __conv sub_group_broadcast( short value, uint index ); -+short2 __ovld __conv sub_group_broadcast( short2 value, uint index ); -+short3 __ovld __conv sub_group_broadcast( short3 value, uint index ); -+short4 __ovld __conv sub_group_broadcast( short4 value, uint index ); -+short8 __ovld __conv sub_group_broadcast( short8 value, uint index ); -+short16 __ovld __conv sub_group_broadcast( short16 value, uint index ); -+ -+ushort __ovld __conv sub_group_broadcast( ushort value, uint index ); -+ushort2 __ovld __conv sub_group_broadcast( ushort2 value, uint index ); -+ushort3 __ovld __conv sub_group_broadcast( ushort3 value, uint index ); -+ushort4 __ovld __conv sub_group_broadcast( ushort4 value, uint index ); -+ushort8 __ovld __conv sub_group_broadcast( ushort8 value, uint index ); -+ushort16 __ovld __conv sub_group_broadcast( ushort16 value, uint index ); -+ -+// scalar int broadcast is part of cl_khr_subgroups -+int2 __ovld __conv sub_group_broadcast( int2 value, uint index ); -+int3 __ovld __conv sub_group_broadcast( int3 value, uint index ); -+int4 __ovld __conv sub_group_broadcast( int4 value, uint index ); -+int8 __ovld __conv sub_group_broadcast( int8 value, uint index ); -+int16 __ovld __conv sub_group_broadcast( int16 value, uint index ); -+ -+// scalar uint broadcast is part of cl_khr_subgroups -+uint2 __ovld __conv sub_group_broadcast( uint2 value, uint index ); -+uint3 __ovld __conv sub_group_broadcast( uint3 value, uint index ); -+uint4 __ovld __conv sub_group_broadcast( uint4 value, uint index ); -+uint8 __ovld __conv sub_group_broadcast( uint8 value, uint index ); -+uint16 __ovld __conv sub_group_broadcast( uint16 value, uint index ); -+ -+// scalar long broadcast is part of cl_khr_subgroups -+long2 __ovld __conv sub_group_broadcast( long2 value, uint index ); -+long3 __ovld __conv sub_group_broadcast( long3 value, uint index ); -+long4 __ovld __conv sub_group_broadcast( long4 value, uint index ); -+long8 __ovld __conv sub_group_broadcast( long8 value, uint index ); -+long16 __ovld __conv sub_group_broadcast( long16 value, uint index ); -+ -+// scalar ulong broadcast is part of cl_khr_subgroups -+ulong2 __ovld __conv sub_group_broadcast( ulong2 value, uint index ); -+ulong3 __ovld __conv sub_group_broadcast( ulong3 value, uint index ); -+ulong4 __ovld __conv sub_group_broadcast( ulong4 value, uint index ); -+ulong8 __ovld __conv sub_group_broadcast( ulong8 value, uint index ); -+ulong16 __ovld __conv sub_group_broadcast( ulong16 value, uint index ); -+ -+// scalar float broadcast is part of cl_khr_subgroups -+float2 __ovld __conv sub_group_broadcast( float2 value, uint index ); -+float3 __ovld __conv sub_group_broadcast( float3 value, uint index ); -+float4 __ovld __conv sub_group_broadcast( float4 value, uint index ); -+float8 __ovld __conv sub_group_broadcast( float8 value, uint index ); -+float16 __ovld __conv sub_group_broadcast( float16 value, uint index ); -+ -+char __ovld __conv sub_group_reduce_add( char value ); -+uchar __ovld __conv sub_group_reduce_add( uchar value ); -+short __ovld __conv sub_group_reduce_add( short value ); -+ushort __ovld __conv sub_group_reduce_add( ushort value ); -+ -+char __ovld __conv sub_group_reduce_min( char value ); -+uchar __ovld __conv sub_group_reduce_min( uchar value ); -+short __ovld __conv sub_group_reduce_min( short value ); -+ushort __ovld __conv sub_group_reduce_min( ushort value ); -+ -+char __ovld __conv sub_group_reduce_max( char value ); -+uchar __ovld __conv sub_group_reduce_max( uchar value ); -+short __ovld __conv sub_group_reduce_max( short value ); -+ushort __ovld __conv sub_group_reduce_max( ushort value ); -+ -+char __ovld __conv sub_group_scan_inclusive_add( char value ); -+uchar __ovld __conv sub_group_scan_inclusive_add( uchar value ); -+short __ovld __conv sub_group_scan_inclusive_add( short value ); -+ushort __ovld __conv sub_group_scan_inclusive_add( ushort value ); -+ -+char __ovld __conv sub_group_scan_inclusive_min( char value ); -+uchar __ovld __conv sub_group_scan_inclusive_min( uchar value ); -+short __ovld __conv sub_group_scan_inclusive_min( short value ); -+ushort __ovld __conv sub_group_scan_inclusive_min( ushort value ); -+ -+char __ovld __conv sub_group_scan_inclusive_max( char value ); -+uchar __ovld __conv sub_group_scan_inclusive_max( uchar value ); -+short __ovld __conv sub_group_scan_inclusive_max( short value ); -+ushort __ovld __conv sub_group_scan_inclusive_max( ushort value ); -+ -+char __ovld __conv sub_group_scan_exclusive_add( char value ); -+uchar __ovld __conv sub_group_scan_exclusive_add( uchar value ); -+short __ovld __conv sub_group_scan_exclusive_add( short value ); -+ushort __ovld __conv sub_group_scan_exclusive_add( ushort value ); -+ -+char __ovld __conv sub_group_scan_exclusive_min( char value ); -+uchar __ovld __conv sub_group_scan_exclusive_min( uchar value ); -+short __ovld __conv sub_group_scan_exclusive_min( short value ); -+ushort __ovld __conv sub_group_scan_exclusive_min( ushort value ); -+ -+char __ovld __conv sub_group_scan_exclusive_max( char value ); -+uchar __ovld __conv sub_group_scan_exclusive_max( uchar value ); -+short __ovld __conv sub_group_scan_exclusive_max( short value ); -+ushort __ovld __conv sub_group_scan_exclusive_max( ushort value ); -+ -+#if defined(cl_khr_fp16) -+// scalar half broadcast is part of cl_khr_subgroups -+half2 __ovld __conv sub_group_broadcast( half2 value, uint index ); -+half3 __ovld __conv sub_group_broadcast( half3 value, uint index ); -+half4 __ovld __conv sub_group_broadcast( half4 value, uint index ); -+half8 __ovld __conv sub_group_broadcast( half8 value, uint index ); -+half16 __ovld __conv sub_group_broadcast( half16 value, uint index ); -+#endif // cl_khr_fp16 -+ -+#if defined(cl_khr_fp64) -+// scalar double broadcast is part of cl_khr_subgroups -+double2 __ovld __conv sub_group_broadcast( double2 value, uint index ); -+double3 __ovld __conv sub_group_broadcast( double3 value, uint index ); -+double4 __ovld __conv sub_group_broadcast( double4 value, uint index ); -+double8 __ovld __conv sub_group_broadcast( double8 value, uint index ); -+double16 __ovld __conv sub_group_broadcast( double16 value, uint index ); -+#endif // cl_khr_fp64 -+ -+#endif // cl_khr_subgroup_extended_types -+ -+#if defined(cl_khr_subgroup_non_uniform_vote) -+int __ovld sub_group_elect(void); -+int __ovld sub_group_non_uniform_all( int predicate ); -+int __ovld sub_group_non_uniform_any( int predicate ); -+ -+int __ovld sub_group_non_uniform_all_equal( char value ); -+int __ovld sub_group_non_uniform_all_equal( uchar value ); -+int __ovld sub_group_non_uniform_all_equal( short value ); -+int __ovld sub_group_non_uniform_all_equal( ushort value ); -+int __ovld sub_group_non_uniform_all_equal( int value ); -+int __ovld sub_group_non_uniform_all_equal( uint value ); -+int __ovld sub_group_non_uniform_all_equal( long value ); -+int __ovld sub_group_non_uniform_all_equal( ulong value ); -+int __ovld sub_group_non_uniform_all_equal( float value ); -+ -+#if defined(cl_khr_fp16) -+int __ovld sub_group_non_uniform_all_equal( half value ); -+#endif // cl_khr_fp16 -+ -+#if defined(cl_khr_fp64) -+int __ovld sub_group_non_uniform_all_equal( double value ); -+#endif // cl_khr_fp64 -+ -+#endif // cl_khr_subgroup_non_uniform_vote -+ -+#if defined(cl_khr_subgroup_ballot) -+char __ovld sub_group_non_uniform_broadcast( char value, uint index ); -+char2 __ovld sub_group_non_uniform_broadcast( char2 value, uint index ); -+char3 __ovld sub_group_non_uniform_broadcast( char3 value, uint index ); -+char4 __ovld sub_group_non_uniform_broadcast( char4 value, uint index ); -+char8 __ovld sub_group_non_uniform_broadcast( char8 value, uint index ); -+char16 __ovld sub_group_non_uniform_broadcast( char16 value, uint index ); -+ -+uchar __ovld sub_group_non_uniform_broadcast( uchar value, uint index ); -+uchar2 __ovld sub_group_non_uniform_broadcast( uchar2 value, uint index ); -+uchar3 __ovld sub_group_non_uniform_broadcast( uchar3 value, uint index ); -+uchar4 __ovld sub_group_non_uniform_broadcast( uchar4 value, uint index ); -+uchar8 __ovld sub_group_non_uniform_broadcast( uchar8 value, uint index ); -+uchar16 __ovld sub_group_non_uniform_broadcast( uchar16 value, uint index ); -+ -+short __ovld sub_group_non_uniform_broadcast( short value, uint index ); -+short2 __ovld sub_group_non_uniform_broadcast( short2 value, uint index ); -+short3 __ovld sub_group_non_uniform_broadcast( short3 value, uint index ); -+short4 __ovld sub_group_non_uniform_broadcast( short4 value, uint index ); -+short8 __ovld sub_group_non_uniform_broadcast( short8 value, uint index ); -+short16 __ovld sub_group_non_uniform_broadcast( short16 value, uint index ); -+ -+ushort __ovld sub_group_non_uniform_broadcast( ushort value, uint index ); -+ushort2 __ovld sub_group_non_uniform_broadcast( ushort2 value, uint index ); -+ushort3 __ovld sub_group_non_uniform_broadcast( ushort3 value, uint index ); -+ushort4 __ovld sub_group_non_uniform_broadcast( ushort4 value, uint index ); -+ushort8 __ovld sub_group_non_uniform_broadcast( ushort8 value, uint index ); -+ushort16 __ovld sub_group_non_uniform_broadcast( ushort16 value, uint index ); -+ -+int __ovld sub_group_non_uniform_broadcast( int value, uint index ); -+int2 __ovld sub_group_non_uniform_broadcast( int2 value, uint index ); -+int3 __ovld sub_group_non_uniform_broadcast( int3 value, uint index ); -+int4 __ovld sub_group_non_uniform_broadcast( int4 value, uint index ); -+int8 __ovld sub_group_non_uniform_broadcast( int8 value, uint index ); -+int16 __ovld sub_group_non_uniform_broadcast( int16 value, uint index ); -+ -+uint __ovld sub_group_non_uniform_broadcast( uint value, uint index ); -+uint2 __ovld sub_group_non_uniform_broadcast( uint2 value, uint index ); -+uint3 __ovld sub_group_non_uniform_broadcast( uint3 value, uint index ); -+uint4 __ovld sub_group_non_uniform_broadcast( uint4 value, uint index ); -+uint8 __ovld sub_group_non_uniform_broadcast( uint8 value, uint index ); -+uint16 __ovld sub_group_non_uniform_broadcast( uint16 value, uint index ); -+ -+long __ovld sub_group_non_uniform_broadcast( long value, uint index ); -+long2 __ovld sub_group_non_uniform_broadcast( long2 value, uint index ); -+long3 __ovld sub_group_non_uniform_broadcast( long3 value, uint index ); -+long4 __ovld sub_group_non_uniform_broadcast( long4 value, uint index ); -+long8 __ovld sub_group_non_uniform_broadcast( long8 value, uint index ); -+long16 __ovld sub_group_non_uniform_broadcast( long16 value, uint index ); -+ -+ulong __ovld sub_group_non_uniform_broadcast( ulong value, uint index ); -+ulong2 __ovld sub_group_non_uniform_broadcast( ulong2 value, uint index ); -+ulong3 __ovld sub_group_non_uniform_broadcast( ulong3 value, uint index ); -+ulong4 __ovld sub_group_non_uniform_broadcast( ulong4 value, uint index ); -+ulong8 __ovld sub_group_non_uniform_broadcast( ulong8 value, uint index ); -+ulong16 __ovld sub_group_non_uniform_broadcast( ulong16 value, uint index ); -+ -+float __ovld sub_group_non_uniform_broadcast( float value, uint index ); -+float2 __ovld sub_group_non_uniform_broadcast( float2 value, uint index ); -+float3 __ovld sub_group_non_uniform_broadcast( float3 value, uint index ); -+float4 __ovld sub_group_non_uniform_broadcast( float4 value, uint index ); -+float8 __ovld sub_group_non_uniform_broadcast( float8 value, uint index ); -+float16 __ovld sub_group_non_uniform_broadcast( float16 value, uint index ); -+ -+char __ovld sub_group_broadcast_first( char value ); -+uchar __ovld sub_group_broadcast_first( uchar value ); -+short __ovld sub_group_broadcast_first( short value ); -+ushort __ovld sub_group_broadcast_first( ushort value ); -+int __ovld sub_group_broadcast_first( int value ); -+uint __ovld sub_group_broadcast_first( uint value ); -+long __ovld sub_group_broadcast_first( long value ); -+ulong __ovld sub_group_broadcast_first( ulong value ); -+float __ovld sub_group_broadcast_first( float value ); -+ -+uint4 __ovld sub_group_ballot( int predicate ); -+int __ovld __cnfn sub_group_inverse_ballot( uint4 value ); -+int __ovld __cnfn sub_group_ballot_bit_extract( uint4 value, uint index ); -+uint __ovld __cnfn sub_group_ballot_bit_count( uint4 value ); -+ -+uint __ovld sub_group_ballot_inclusive_scan( uint4 value ); -+uint __ovld sub_group_ballot_exclusive_scan( uint4 value ); -+uint __ovld sub_group_ballot_find_lsb( uint4 value ); -+uint __ovld sub_group_ballot_find_msb( uint4 value ); -+ -+uint4 __ovld __cnfn get_sub_group_eq_mask(void); -+uint4 __ovld __cnfn get_sub_group_ge_mask(void); -+uint4 __ovld __cnfn get_sub_group_gt_mask(void); -+uint4 __ovld __cnfn get_sub_group_le_mask(void); -+uint4 __ovld __cnfn get_sub_group_lt_mask(void); -+ -+#if defined(cl_khr_fp16) -+half __ovld sub_group_non_uniform_broadcast( half value, uint index ); -+half2 __ovld sub_group_non_uniform_broadcast( half2 value, uint index ); -+half3 __ovld sub_group_non_uniform_broadcast( half3 value, uint index ); -+half4 __ovld sub_group_non_uniform_broadcast( half4 value, uint index ); -+half8 __ovld sub_group_non_uniform_broadcast( half8 value, uint index ); -+half16 __ovld sub_group_non_uniform_broadcast( half16 value, uint index ); -+ -+half __ovld sub_group_broadcast_first( half value ); -+#endif // cl_khr_fp16 -+ -+#if defined(cl_khr_fp64) -+double __ovld sub_group_non_uniform_broadcast( double value, uint index ); -+double2 __ovld sub_group_non_uniform_broadcast( double2 value, uint index ); -+double3 __ovld sub_group_non_uniform_broadcast( double3 value, uint index ); -+double4 __ovld sub_group_non_uniform_broadcast( double4 value, uint index ); -+double8 __ovld sub_group_non_uniform_broadcast( double8 value, uint index ); -+double16 __ovld sub_group_non_uniform_broadcast( double16 value, uint index ); -+ -+double __ovld sub_group_broadcast_first( double value ); -+#endif // cl_khr_fp64 -+ -+#endif // cl_khr_subgroup_ballot -+ -+#if defined(cl_khr_subgroup_non_uniform_arithmetic) -+char __ovld sub_group_non_uniform_reduce_add( char value ); -+uchar __ovld sub_group_non_uniform_reduce_add( uchar value ); -+short __ovld sub_group_non_uniform_reduce_add( short value ); -+ushort __ovld sub_group_non_uniform_reduce_add( ushort value ); -+int __ovld sub_group_non_uniform_reduce_add( int value ); -+uint __ovld sub_group_non_uniform_reduce_add( uint value ); -+long __ovld sub_group_non_uniform_reduce_add( long value ); -+ulong __ovld sub_group_non_uniform_reduce_add( ulong value ); -+float __ovld sub_group_non_uniform_reduce_add( float value ); -+ -+char __ovld sub_group_non_uniform_reduce_mul( char value ); -+uchar __ovld sub_group_non_uniform_reduce_mul( uchar value ); -+short __ovld sub_group_non_uniform_reduce_mul( short value ); -+ushort __ovld sub_group_non_uniform_reduce_mul( ushort value ); -+int __ovld sub_group_non_uniform_reduce_mul( int value ); -+uint __ovld sub_group_non_uniform_reduce_mul( uint value ); -+long __ovld sub_group_non_uniform_reduce_mul( long value ); -+ulong __ovld sub_group_non_uniform_reduce_mul( ulong value ); -+float __ovld sub_group_non_uniform_reduce_mul( float value ); -+ -+char __ovld sub_group_non_uniform_reduce_min( char value ); -+uchar __ovld sub_group_non_uniform_reduce_min( uchar value ); -+short __ovld sub_group_non_uniform_reduce_min( short value ); -+ushort __ovld sub_group_non_uniform_reduce_min( ushort value ); -+int __ovld sub_group_non_uniform_reduce_min( int value ); -+uint __ovld sub_group_non_uniform_reduce_min( uint value ); -+long __ovld sub_group_non_uniform_reduce_min( long value ); -+ulong __ovld sub_group_non_uniform_reduce_min( ulong value ); -+float __ovld sub_group_non_uniform_reduce_min( float value ); -+ -+char __ovld sub_group_non_uniform_reduce_max( char value ); -+uchar __ovld sub_group_non_uniform_reduce_max( uchar value ); -+short __ovld sub_group_non_uniform_reduce_max( short value ); -+ushort __ovld sub_group_non_uniform_reduce_max( ushort value ); -+int __ovld sub_group_non_uniform_reduce_max( int value ); -+uint __ovld sub_group_non_uniform_reduce_max( uint value ); -+long __ovld sub_group_non_uniform_reduce_max( long value ); -+ulong __ovld sub_group_non_uniform_reduce_max( ulong value ); -+float __ovld sub_group_non_uniform_reduce_max( float value ); -+ -+char __ovld sub_group_non_uniform_scan_inclusive_add( char value ); -+uchar __ovld sub_group_non_uniform_scan_inclusive_add( uchar value ); -+short __ovld sub_group_non_uniform_scan_inclusive_add( short value ); -+ushort __ovld sub_group_non_uniform_scan_inclusive_add( ushort value ); -+int __ovld sub_group_non_uniform_scan_inclusive_add( int value ); -+uint __ovld sub_group_non_uniform_scan_inclusive_add( uint value ); -+long __ovld sub_group_non_uniform_scan_inclusive_add( long value ); -+ulong __ovld sub_group_non_uniform_scan_inclusive_add( ulong value ); -+float __ovld sub_group_non_uniform_scan_inclusive_add( float value ); -+ -+char __ovld sub_group_non_uniform_scan_inclusive_mul( char value ); -+uchar __ovld sub_group_non_uniform_scan_inclusive_mul( uchar value ); -+short __ovld sub_group_non_uniform_scan_inclusive_mul( short value ); -+ushort __ovld sub_group_non_uniform_scan_inclusive_mul( ushort value ); -+int __ovld sub_group_non_uniform_scan_inclusive_mul( int value ); -+uint __ovld sub_group_non_uniform_scan_inclusive_mul( uint value ); -+long __ovld sub_group_non_uniform_scan_inclusive_mul( long value ); -+ulong __ovld sub_group_non_uniform_scan_inclusive_mul( ulong value ); -+float __ovld sub_group_non_uniform_scan_inclusive_mul( float value ); -+ -+char __ovld sub_group_non_uniform_scan_inclusive_min( char value ); -+uchar __ovld sub_group_non_uniform_scan_inclusive_min( uchar value ); -+short __ovld sub_group_non_uniform_scan_inclusive_min( short value ); -+ushort __ovld sub_group_non_uniform_scan_inclusive_min( ushort value ); -+int __ovld sub_group_non_uniform_scan_inclusive_min( int value ); -+uint __ovld sub_group_non_uniform_scan_inclusive_min( uint value ); -+long __ovld sub_group_non_uniform_scan_inclusive_min( long value ); -+ulong __ovld sub_group_non_uniform_scan_inclusive_min( ulong value ); -+float __ovld sub_group_non_uniform_scan_inclusive_min( float value ); -+ -+char __ovld sub_group_non_uniform_scan_inclusive_max( char value ); -+uchar __ovld sub_group_non_uniform_scan_inclusive_max( uchar value ); -+short __ovld sub_group_non_uniform_scan_inclusive_max( short value ); -+ushort __ovld sub_group_non_uniform_scan_inclusive_max( ushort value ); -+int __ovld sub_group_non_uniform_scan_inclusive_max( int value ); -+uint __ovld sub_group_non_uniform_scan_inclusive_max( uint value ); -+long __ovld sub_group_non_uniform_scan_inclusive_max( long value ); -+ulong __ovld sub_group_non_uniform_scan_inclusive_max( ulong value ); -+float __ovld sub_group_non_uniform_scan_inclusive_max( float value ); -+ -+char __ovld sub_group_non_uniform_scan_exclusive_add( char value ); -+uchar __ovld sub_group_non_uniform_scan_exclusive_add( uchar value ); -+short __ovld sub_group_non_uniform_scan_exclusive_add( short value ); -+ushort __ovld sub_group_non_uniform_scan_exclusive_add( ushort value ); -+int __ovld sub_group_non_uniform_scan_exclusive_add( int value ); -+uint __ovld sub_group_non_uniform_scan_exclusive_add( uint value ); -+long __ovld sub_group_non_uniform_scan_exclusive_add( long value ); -+ulong __ovld sub_group_non_uniform_scan_exclusive_add( ulong value ); -+float __ovld sub_group_non_uniform_scan_exclusive_add( float value ); -+ -+char __ovld sub_group_non_uniform_scan_exclusive_mul( char value ); -+uchar __ovld sub_group_non_uniform_scan_exclusive_mul( uchar value ); -+short __ovld sub_group_non_uniform_scan_exclusive_mul( short value ); -+ushort __ovld sub_group_non_uniform_scan_exclusive_mul( ushort value ); -+int __ovld sub_group_non_uniform_scan_exclusive_mul( int value ); -+uint __ovld sub_group_non_uniform_scan_exclusive_mul( uint value ); -+long __ovld sub_group_non_uniform_scan_exclusive_mul( long value ); -+ulong __ovld sub_group_non_uniform_scan_exclusive_mul( ulong value ); -+float __ovld sub_group_non_uniform_scan_exclusive_mul( float value ); -+ -+char __ovld sub_group_non_uniform_scan_exclusive_min( char value ); -+uchar __ovld sub_group_non_uniform_scan_exclusive_min( uchar value ); -+short __ovld sub_group_non_uniform_scan_exclusive_min( short value ); -+ushort __ovld sub_group_non_uniform_scan_exclusive_min( ushort value ); -+int __ovld sub_group_non_uniform_scan_exclusive_min( int value ); -+uint __ovld sub_group_non_uniform_scan_exclusive_min( uint value ); -+long __ovld sub_group_non_uniform_scan_exclusive_min( long value ); -+ulong __ovld sub_group_non_uniform_scan_exclusive_min( ulong value ); -+float __ovld sub_group_non_uniform_scan_exclusive_min( float value ); -+ -+char __ovld sub_group_non_uniform_scan_exclusive_max( char value ); -+uchar __ovld sub_group_non_uniform_scan_exclusive_max( uchar value ); -+short __ovld sub_group_non_uniform_scan_exclusive_max( short value ); -+ushort __ovld sub_group_non_uniform_scan_exclusive_max( ushort value ); -+int __ovld sub_group_non_uniform_scan_exclusive_max( int value ); -+uint __ovld sub_group_non_uniform_scan_exclusive_max( uint value ); -+long __ovld sub_group_non_uniform_scan_exclusive_max( long value ); -+ulong __ovld sub_group_non_uniform_scan_exclusive_max( ulong value ); -+float __ovld sub_group_non_uniform_scan_exclusive_max( float value ); -+ -+char __ovld sub_group_non_uniform_reduce_and( char value ); -+uchar __ovld sub_group_non_uniform_reduce_and( uchar value ); -+short __ovld sub_group_non_uniform_reduce_and( short value ); -+ushort __ovld sub_group_non_uniform_reduce_and( ushort value ); -+int __ovld sub_group_non_uniform_reduce_and( int value ); -+uint __ovld sub_group_non_uniform_reduce_and( uint value ); -+long __ovld sub_group_non_uniform_reduce_and( long value ); -+ulong __ovld sub_group_non_uniform_reduce_and( ulong value ); -+ -+char __ovld sub_group_non_uniform_reduce_or( char value ); -+uchar __ovld sub_group_non_uniform_reduce_or( uchar value ); -+short __ovld sub_group_non_uniform_reduce_or( short value ); -+ushort __ovld sub_group_non_uniform_reduce_or( ushort value ); -+int __ovld sub_group_non_uniform_reduce_or( int value ); -+uint __ovld sub_group_non_uniform_reduce_or( uint value ); -+long __ovld sub_group_non_uniform_reduce_or( long value ); -+ulong __ovld sub_group_non_uniform_reduce_or( ulong value ); -+ -+char __ovld sub_group_non_uniform_reduce_xor( char value ); -+uchar __ovld sub_group_non_uniform_reduce_xor( uchar value ); -+short __ovld sub_group_non_uniform_reduce_xor( short value ); -+ushort __ovld sub_group_non_uniform_reduce_xor( ushort value ); -+int __ovld sub_group_non_uniform_reduce_xor( int value ); -+uint __ovld sub_group_non_uniform_reduce_xor( uint value ); -+long __ovld sub_group_non_uniform_reduce_xor( long value ); -+ulong __ovld sub_group_non_uniform_reduce_xor( ulong value ); -+ -+char __ovld sub_group_non_uniform_scan_inclusive_and( char value ); -+uchar __ovld sub_group_non_uniform_scan_inclusive_and( uchar value ); -+short __ovld sub_group_non_uniform_scan_inclusive_and( short value ); -+ushort __ovld sub_group_non_uniform_scan_inclusive_and( ushort value ); -+int __ovld sub_group_non_uniform_scan_inclusive_and( int value ); -+uint __ovld sub_group_non_uniform_scan_inclusive_and( uint value ); -+long __ovld sub_group_non_uniform_scan_inclusive_and( long value ); -+ulong __ovld sub_group_non_uniform_scan_inclusive_and( ulong value ); -+ -+char __ovld sub_group_non_uniform_scan_inclusive_or( char value ); -+uchar __ovld sub_group_non_uniform_scan_inclusive_or( uchar value ); -+short __ovld sub_group_non_uniform_scan_inclusive_or( short value ); -+ushort __ovld sub_group_non_uniform_scan_inclusive_or( ushort value ); -+int __ovld sub_group_non_uniform_scan_inclusive_or( int value ); -+uint __ovld sub_group_non_uniform_scan_inclusive_or( uint value ); -+long __ovld sub_group_non_uniform_scan_inclusive_or( long value ); -+ulong __ovld sub_group_non_uniform_scan_inclusive_or( ulong value ); -+ -+char __ovld sub_group_non_uniform_scan_inclusive_xor( char value ); -+uchar __ovld sub_group_non_uniform_scan_inclusive_xor( uchar value ); -+short __ovld sub_group_non_uniform_scan_inclusive_xor( short value ); -+ushort __ovld sub_group_non_uniform_scan_inclusive_xor( ushort value ); -+int __ovld sub_group_non_uniform_scan_inclusive_xor( int value ); -+uint __ovld sub_group_non_uniform_scan_inclusive_xor( uint value ); -+long __ovld sub_group_non_uniform_scan_inclusive_xor( long value ); -+ulong __ovld sub_group_non_uniform_scan_inclusive_xor( ulong value ); -+ -+char __ovld sub_group_non_uniform_scan_exclusive_and( char value ); -+uchar __ovld sub_group_non_uniform_scan_exclusive_and( uchar value ); -+short __ovld sub_group_non_uniform_scan_exclusive_and( short value ); -+ushort __ovld sub_group_non_uniform_scan_exclusive_and( ushort value ); -+int __ovld sub_group_non_uniform_scan_exclusive_and( int value ); -+uint __ovld sub_group_non_uniform_scan_exclusive_and( uint value ); -+long __ovld sub_group_non_uniform_scan_exclusive_and( long value ); -+ulong __ovld sub_group_non_uniform_scan_exclusive_and( ulong value ); -+ -+char __ovld sub_group_non_uniform_scan_exclusive_or( char value ); -+uchar __ovld sub_group_non_uniform_scan_exclusive_or( uchar value ); -+short __ovld sub_group_non_uniform_scan_exclusive_or( short value ); -+ushort __ovld sub_group_non_uniform_scan_exclusive_or( ushort value ); -+int __ovld sub_group_non_uniform_scan_exclusive_or( int value ); -+uint __ovld sub_group_non_uniform_scan_exclusive_or( uint value ); -+long __ovld sub_group_non_uniform_scan_exclusive_or( long value ); -+ulong __ovld sub_group_non_uniform_scan_exclusive_or( ulong value ); -+ -+char __ovld sub_group_non_uniform_scan_exclusive_xor( char value ); -+uchar __ovld sub_group_non_uniform_scan_exclusive_xor( uchar value ); -+short __ovld sub_group_non_uniform_scan_exclusive_xor( short value ); -+ushort __ovld sub_group_non_uniform_scan_exclusive_xor( ushort value ); -+int __ovld sub_group_non_uniform_scan_exclusive_xor( int value ); -+uint __ovld sub_group_non_uniform_scan_exclusive_xor( uint value ); -+long __ovld sub_group_non_uniform_scan_exclusive_xor( long value ); -+ulong __ovld sub_group_non_uniform_scan_exclusive_xor( ulong value ); -+ -+int __ovld sub_group_non_uniform_reduce_logical_and( int predicate ); -+int __ovld sub_group_non_uniform_reduce_logical_or( int predicate ); -+int __ovld sub_group_non_uniform_reduce_logical_xor( int predicate ); -+ -+int __ovld sub_group_non_uniform_scan_inclusive_logical_and( int predicate ); -+int __ovld sub_group_non_uniform_scan_inclusive_logical_or( int predicate ); -+int __ovld sub_group_non_uniform_scan_inclusive_logical_xor( int predicate ); -+ -+int __ovld sub_group_non_uniform_scan_exclusive_logical_and( int predicate ); -+int __ovld sub_group_non_uniform_scan_exclusive_logical_or( int predicate ); -+int __ovld sub_group_non_uniform_scan_exclusive_logical_xor( int predicate ); -+ -+#if defined(cl_khr_fp16) -+half __ovld sub_group_non_uniform_reduce_add( half value ); -+half __ovld sub_group_non_uniform_reduce_mul( half value ); -+half __ovld sub_group_non_uniform_reduce_min( half value ); -+half __ovld sub_group_non_uniform_reduce_max( half value ); -+half __ovld sub_group_non_uniform_scan_inclusive_add( half value ); -+half __ovld sub_group_non_uniform_scan_inclusive_mul( half value ); -+half __ovld sub_group_non_uniform_scan_inclusive_min( half value ); -+half __ovld sub_group_non_uniform_scan_inclusive_max( half value ); -+half __ovld sub_group_non_uniform_scan_exclusive_add( half value ); -+half __ovld sub_group_non_uniform_scan_exclusive_mul( half value ); -+half __ovld sub_group_non_uniform_scan_exclusive_min( half value ); -+half __ovld sub_group_non_uniform_scan_exclusive_max( half value ); -+#endif // cl_khr_fp16 -+ -+#if defined(cl_khr_fp64) -+double __ovld sub_group_non_uniform_reduce_add( double value ); -+double __ovld sub_group_non_uniform_reduce_mul( double value ); -+double __ovld sub_group_non_uniform_reduce_min( double value ); -+double __ovld sub_group_non_uniform_reduce_max( double value ); -+double __ovld sub_group_non_uniform_scan_inclusive_add( double value ); -+double __ovld sub_group_non_uniform_scan_inclusive_mul( double value ); -+double __ovld sub_group_non_uniform_scan_inclusive_min( double value ); -+double __ovld sub_group_non_uniform_scan_inclusive_max( double value ); -+double __ovld sub_group_non_uniform_scan_exclusive_add( double value ); -+double __ovld sub_group_non_uniform_scan_exclusive_mul( double value ); -+double __ovld sub_group_non_uniform_scan_exclusive_min( double value ); -+double __ovld sub_group_non_uniform_scan_exclusive_max( double value ); -+#endif // cl_khr_fp64 -+ -+#endif // cl_khr_subgroup_non_uniform_arithmetic -+ -+#if defined(cl_khr_subgroup_shuffle) -+char __ovld sub_group_shuffle( char value, uint index ); -+uchar __ovld sub_group_shuffle( uchar value, uint index ); -+short __ovld sub_group_shuffle( short value, uint index ); -+ushort __ovld sub_group_shuffle( ushort value, uint index ); -+int __ovld sub_group_shuffle( int value, uint index ); -+uint __ovld sub_group_shuffle( uint value, uint index ); -+long __ovld sub_group_shuffle( long value, uint index ); -+ulong __ovld sub_group_shuffle( ulong value, uint index ); -+float __ovld sub_group_shuffle( float value, uint index ); -+ -+char __ovld sub_group_shuffle_xor( char value, uint mask ); -+uchar __ovld sub_group_shuffle_xor( uchar value, uint mask ); -+short __ovld sub_group_shuffle_xor( short value, uint mask ); -+ushort __ovld sub_group_shuffle_xor( ushort value, uint mask ); -+int __ovld sub_group_shuffle_xor( int value, uint mask ); -+uint __ovld sub_group_shuffle_xor( uint value, uint mask ); -+long __ovld sub_group_shuffle_xor( long value, uint mask ); -+ulong __ovld sub_group_shuffle_xor( ulong value, uint mask ); -+float __ovld sub_group_shuffle_xor( float value, uint mask ); -+ -+#if defined(cl_khr_fp16) -+half __ovld sub_group_shuffle( half value, uint index ); -+half __ovld sub_group_shuffle_xor( half value, uint mask ); -+#endif // cl_khr_fp16 -+ -+#if defined(cl_khr_fp64) -+double __ovld sub_group_shuffle( double value, uint index ); -+double __ovld sub_group_shuffle_xor( double value, uint mask ); -+#endif // cl_khr_fp64 -+ -+#endif // cl_khr_subgroup_shuffle -+ -+#if defined(cl_khr_subgroup_shuffle_relative) -+char __ovld sub_group_shuffle_up( char value, uint delta ); -+uchar __ovld sub_group_shuffle_up( uchar value, uint delta ); -+short __ovld sub_group_shuffle_up( short value, uint delta ); -+ushort __ovld sub_group_shuffle_up( ushort value, uint delta ); -+int __ovld sub_group_shuffle_up( int value, uint delta ); -+uint __ovld sub_group_shuffle_up( uint value, uint delta ); -+long __ovld sub_group_shuffle_up( long value, uint delta ); -+ulong __ovld sub_group_shuffle_up( ulong value, uint delta ); -+float __ovld sub_group_shuffle_up( float value, uint delta ); -+ -+char __ovld sub_group_shuffle_down( char value, uint delta ); -+uchar __ovld sub_group_shuffle_down( uchar value, uint delta ); -+short __ovld sub_group_shuffle_down( short value, uint delta ); -+ushort __ovld sub_group_shuffle_down( ushort value, uint delta ); -+int __ovld sub_group_shuffle_down( int value, uint delta ); -+uint __ovld sub_group_shuffle_down( uint value, uint delta ); -+long __ovld sub_group_shuffle_down( long value, uint delta ); -+ulong __ovld sub_group_shuffle_down( ulong value, uint delta ); -+float __ovld sub_group_shuffle_down( float value, uint delta ); -+ -+#if defined(cl_khr_fp16) -+half __ovld sub_group_shuffle_up( half value, uint delta ); -+half __ovld sub_group_shuffle_down( half value, uint delta ); -+#endif // cl_khr_fp16 -+ -+#if defined(cl_khr_fp64) -+double __ovld sub_group_shuffle_up( double value, uint delta ); -+double __ovld sub_group_shuffle_down( double value, uint delta ); -+#endif // cl_khr_fp64 -+ -+#endif // cl_khr_subgroup_shuffle_relative -+ -+#if defined(cl_khr_subgroup_clustered_reduce) -+char __ovld sub_group_clustered_reduce_add( char value, uint clustersize ); -+uchar __ovld sub_group_clustered_reduce_add( uchar value, uint clustersize ); -+short __ovld sub_group_clustered_reduce_add( short value, uint clustersize ); -+ushort __ovld sub_group_clustered_reduce_add( ushort value, uint clustersize ); -+int __ovld sub_group_clustered_reduce_add( int value, uint clustersize ); -+uint __ovld sub_group_clustered_reduce_add( uint value, uint clustersize ); -+long __ovld sub_group_clustered_reduce_add( long value, uint clustersize ); -+ulong __ovld sub_group_clustered_reduce_add( ulong value, uint clustersize ); -+float __ovld sub_group_clustered_reduce_add( float value, uint clustersize ); -+ -+char __ovld sub_group_clustered_reduce_mul( char value, uint clustersize ); -+uchar __ovld sub_group_clustered_reduce_mul( uchar value, uint clustersize ); -+short __ovld sub_group_clustered_reduce_mul( short value, uint clustersize ); -+ushort __ovld sub_group_clustered_reduce_mul( ushort value, uint clustersize ); -+int __ovld sub_group_clustered_reduce_mul( int value, uint clustersize ); -+uint __ovld sub_group_clustered_reduce_mul( uint value, uint clustersize ); -+long __ovld sub_group_clustered_reduce_mul( long value, uint clustersize ); -+ulong __ovld sub_group_clustered_reduce_mul( ulong value, uint clustersize ); -+float __ovld sub_group_clustered_reduce_mul( float value, uint clustersize ); -+ -+char __ovld sub_group_clustered_reduce_min( char value, uint clustersize ); -+uchar __ovld sub_group_clustered_reduce_min( uchar value, uint clustersize ); -+short __ovld sub_group_clustered_reduce_min( short value, uint clustersize ); -+ushort __ovld sub_group_clustered_reduce_min( ushort value, uint clustersize ); -+int __ovld sub_group_clustered_reduce_min( int value, uint clustersize ); -+uint __ovld sub_group_clustered_reduce_min( uint value, uint clustersize ); -+long __ovld sub_group_clustered_reduce_min( long value, uint clustersize ); -+ulong __ovld sub_group_clustered_reduce_min( ulong value, uint clustersize ); -+float __ovld sub_group_clustered_reduce_min( float value, uint clustersize ); -+ -+char __ovld sub_group_clustered_reduce_max( char value, uint clustersize ); -+uchar __ovld sub_group_clustered_reduce_max( uchar value, uint clustersize ); -+short __ovld sub_group_clustered_reduce_max( short value, uint clustersize ); -+ushort __ovld sub_group_clustered_reduce_max( ushort value, uint clustersize ); -+int __ovld sub_group_clustered_reduce_max( int value, uint clustersize ); -+uint __ovld sub_group_clustered_reduce_max( uint value, uint clustersize ); -+long __ovld sub_group_clustered_reduce_max( long value, uint clustersize ); -+ulong __ovld sub_group_clustered_reduce_max( ulong value, uint clustersize ); -+float __ovld sub_group_clustered_reduce_max( float value, uint clustersize ); -+ -+char __ovld sub_group_clustered_reduce_and( char value, uint clustersize ); -+uchar __ovld sub_group_clustered_reduce_and( uchar value, uint clustersize ); -+short __ovld sub_group_clustered_reduce_and( short value, uint clustersize ); -+ushort __ovld sub_group_clustered_reduce_and( ushort value, uint clustersize ); -+int __ovld sub_group_clustered_reduce_and( int value, uint clustersize ); -+uint __ovld sub_group_clustered_reduce_and( uint value, uint clustersize ); -+long __ovld sub_group_clustered_reduce_and( long value, uint clustersize ); -+ulong __ovld sub_group_clustered_reduce_and( ulong value, uint clustersize ); -+ -+char __ovld sub_group_clustered_reduce_or( char value, uint clustersize ); -+uchar __ovld sub_group_clustered_reduce_or( uchar value, uint clustersize ); -+short __ovld sub_group_clustered_reduce_or( short value, uint clustersize ); -+ushort __ovld sub_group_clustered_reduce_or( ushort value, uint clustersize ); -+int __ovld sub_group_clustered_reduce_or( int value, uint clustersize ); -+uint __ovld sub_group_clustered_reduce_or( uint value, uint clustersize ); -+long __ovld sub_group_clustered_reduce_or( long value, uint clustersize ); -+ulong __ovld sub_group_clustered_reduce_or( ulong value, uint clustersize ); -+ -+char __ovld sub_group_clustered_reduce_xor( char value, uint clustersize ); -+uchar __ovld sub_group_clustered_reduce_xor( uchar value, uint clustersize ); -+short __ovld sub_group_clustered_reduce_xor( short value, uint clustersize ); -+ushort __ovld sub_group_clustered_reduce_xor( ushort value, uint clustersize ); -+int __ovld sub_group_clustered_reduce_xor( int value, uint clustersize ); -+uint __ovld sub_group_clustered_reduce_xor( uint value, uint clustersize ); -+long __ovld sub_group_clustered_reduce_xor( long value, uint clustersize ); -+ulong __ovld sub_group_clustered_reduce_xor( ulong value, uint clustersize ); -+ -+int __ovld sub_group_clustered_reduce_logical_and( int predicate, uint clustersize ); -+int __ovld sub_group_clustered_reduce_logical_or( int predicate, uint clustersize ); -+int __ovld sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize ); -+ -+#if defined(cl_khr_fp16) -+half __ovld sub_group_clustered_reduce_add( half value, uint clustersize ); -+half __ovld sub_group_clustered_reduce_mul( half value, uint clustersize ); -+half __ovld sub_group_clustered_reduce_min( half value, uint clustersize ); -+half __ovld sub_group_clustered_reduce_max( half value, uint clustersize ); -+#endif // cl_khr_fp16 -+ -+#if defined(cl_khr_fp64) -+double __ovld sub_group_clustered_reduce_add( double value, uint clustersize ); -+double __ovld sub_group_clustered_reduce_mul( double value, uint clustersize ); -+double __ovld sub_group_clustered_reduce_min( double value, uint clustersize ); -+double __ovld sub_group_clustered_reduce_max( double value, uint clustersize ); -+#endif // cl_khr_fp64 -+ -+#endif // cl_khr_subgroup_clustered_reduce -+ - #if defined(cl_intel_subgroups) - // Intel-Specific Sub Group Functions - float __ovld __conv intel_sub_group_shuffle( float x, uint c ); -diff --git a/clang/test/SemaOpenCL/extension-version.cl b/clang/test/SemaOpenCL/extension-version.cl -index 0e6bbb7d3bcd..86c78143a0eb 100644 ---- a/clang/test/SemaOpenCL/extension-version.cl -+++ b/clang/test/SemaOpenCL/extension-version.cl -@@ -333,3 +333,86 @@ - #endif - #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : enable - -+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+#ifndef cl_khr_subgroup_extended_types -+#error "Missing cl_khr_subgroup_extended_types" -+#endif -+#else -+#ifdef cl_khr_subgroup_extended_types -+#error "Incorrect cl_khr_subgroup_extended_types define" -+#endif -+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_extended_types' - ignoring}} -+#endif -+#pragma OPENCL EXTENSION cl_khr_subgroup_extended_types : enable -+ -+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+#ifndef cl_khr_subgroup_non_uniform_vote -+#error "Missing cl_khr_subgroup_non_uniform_vote" -+#endif -+#else -+#ifdef cl_khr_subgroup_non_uniform_vote -+#error "Incorrect cl_khr_subgroup_non_uniform_vote define" -+#endif -+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_vote' - ignoring}} -+#endif -+#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable -+ -+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+#ifndef cl_khr_subgroup_ballot -+#error "Missing cl_khr_subgroup_ballot" -+#endif -+#else -+#ifdef cl_khr_subgroup_ballot -+#error "Incorrect cl_khr_subgroup_ballot define" -+#endif -+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_ballot' - ignoring}} -+#endif -+#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable -+ -+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+#ifndef cl_khr_subgroup_non_uniform_arithmetic -+#error "Missing cl_khr_subgroup_non_uniform_arithmetic" -+#endif -+#else -+#ifdef cl_khr_subgroup_non_uniform_arithmetic -+#error "Incorrect cl_khr_subgroup_non_uniform_arithmetic define" -+#endif -+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_arithmetic' - ignoring}} -+#endif -+#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable -+ -+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+#ifndef cl_khr_subgroup_shuffle -+#error "Missing cl_khr_subgroup_shuffle" -+#endif -+#else -+#ifdef cl_khr_subgroup_shuffle -+#error "Incorrect cl_khr_subgroup_shuffle define" -+#endif -+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle' - ignoring}} -+#endif -+#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable -+ -+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+#ifndef cl_khr_subgroup_shuffle_relative -+#error "Missing cl_khr_subgroup_shuffle_relative" -+#endif -+#else -+#ifdef cl_khr_subgroup_shuffle_relative -+#error "Incorrect cl_khr_subgroup_shuffle_relative define" -+#endif -+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle_relative' - ignoring}} -+#endif -+#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable -+ -+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+#ifndef cl_khr_subgroup_clustered_reduce -+#error "Missing cl_khr_subgroup_clustered_reduce" -+#endif -+#else -+#ifdef cl_khr_subgroup_clustered_reduce -+#error "Incorrect cl_khr_subgroup_clustered_reduce define" -+#endif -+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_clustered_reduce' - ignoring}} -+#endif -+#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch deleted file mode 100644 index 1aff65e7..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 331e323ae2633a8999a660314022491d670c442c Mon Sep 17 00:00:00 2001 -From: Andrea Bocci -Date: Sun, 15 Mar 2020 17:35:44 +0100 -Subject: [PATCH 2/3] Fix building in-tree with cmake -DLLVM_LINK_LLVM_DYLIB=ON - -Building in-tree with LLVM 11.0 master with the LLVM_LINK_LLVM_DYLIB -cmake flag fails to link with the LLVMSPIRVLib library. - -Add an explicit dependency to force the correct build order and linking. - -Signed-off-by: Andrea Bocci -Upstream-Status: Backport -Signed-off-by: Anuj Mittal ---- - tools/llvm-spirv/CMakeLists.txt | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/llvm-spirv/CMakeLists.txt b/tools/llvm-spirv/CMakeLists.txt -index 9aa96d9c..501c0daf 100644 ---- a/tools/llvm-spirv/CMakeLists.txt -+++ b/tools/llvm-spirv/CMakeLists.txt -@@ -14,7 +14,7 @@ add_llvm_tool(llvm-spirv - NO_INSTALL_RPATH - ) - --if (LLVM_SPIRV_BUILD_EXTERNAL) -+if (LLVM_SPIRV_BUILD_EXTERNAL OR LLVM_LINK_LLVM_DYLIB) - target_link_libraries(llvm-spirv PRIVATE LLVMSPIRVLib) - endif() - --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch deleted file mode 100644 index 49edd7e1..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch +++ /dev/null @@ -1,982 +0,0 @@ -From fbc9996d6490a5d4720b85b47f38335e7fdc99d9 Mon Sep 17 00:00:00 2001 -From: haonanya -Date: Mon, 19 Jul 2021 10:14:20 +0800 -Subject: [PATCH 3/3] Add support for cl_ext_float_atomics in SPIRVWriter - -Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch] - -Signed-off-by: haonanya -Signed-off-by: Naveen Saini ---- - lib/SPIRV/OCL20ToSPIRV.cpp | 79 ++++++++++++++++-- - lib/SPIRV/SPIRVToOCL.h | 3 + - lib/SPIRV/SPIRVToOCL12.cpp | 21 +++++ - lib/SPIRV/SPIRVToOCL20.cpp | 28 ++++++- - lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 1 - - lib/SPIRV/libSPIRV/SPIRVOpCode.h | 8 +- - test/AtomicFAddEXTForOCL.ll | 64 +++++++++++++++ - test/AtomicFAddExt.ll | 111 ++++++++----------------- - test/AtomicFMaxEXT.ll | 113 +++++++------------------- - test/AtomicFMaxEXTForOCL.ll | 64 +++++++++++++++ - test/AtomicFMinEXT.ll | 113 +++++++------------------- - test/AtomicFMinEXTForOCL.ll | 64 +++++++++++++++ - test/InvalidAtomicBuiltins.cl | 8 -- - 13 files changed, 417 insertions(+), 260 deletions(-) - create mode 100644 test/AtomicFAddEXTForOCL.ll - create mode 100644 test/AtomicFMaxEXTForOCL.ll - create mode 100644 test/AtomicFMinEXTForOCL.ll - -diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp -index e30aa5be..b676a009 100644 ---- a/lib/SPIRV/OCL20ToSPIRV.cpp -+++ b/lib/SPIRV/OCL20ToSPIRV.cpp -@@ -408,10 +408,63 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) { - if (DemangledName.find(kOCLBuiltinName::AtomicPrefix) == 0 || - DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0) { - -- // Compute atomic builtins do not support floating types. -- if (CI.getType()->isFloatingPointTy() && -- isComputeAtomicOCLBuiltin(DemangledName)) -- return; -+ // Compute "atom" prefixed builtins do not support floating types. -+ if (CI.getType()->isFloatingPointTy()) { -+ if (DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0) -+ return; -+ // handle functions which are "atomic_" prefixed. -+ StringRef Stem = DemangledName; -+ Stem = Stem.drop_front(strlen("atomic_")); -+ // FP-typed atomic_{add, sub, inc, dec, exchange, min, max, or, and, xor, -+ // fetch_or, fetch_xor, fetch_and, fetch_or_explicit, fetch_xor_explicit, -+ // fetch_and_explicit} should be identified as function call -+ bool IsFunctionCall = llvm::StringSwitch(Stem) -+ .Case("add", true) -+ .Case("sub", true) -+ .Case("inc", true) -+ .Case("dec", true) -+ .Case("cmpxchg", true) -+ .Case("min", true) -+ .Case("max", true) -+ .Case("or", true) -+ .Case("xor", true) -+ .Case("and", true) -+ .Case("fetch_or", true) -+ .Case("fetch_and", true) -+ .Case("fetch_xor", true) -+ .Case("fetch_or_explicit", true) -+ .Case("fetch_xor_explicit", true) -+ .Case("fetch_and_explicit", true) -+ .Default(false); -+ if (IsFunctionCall) -+ return; -+ if (F->arg_size() != 2) { -+ IsFunctionCall = llvm::StringSwitch(Stem) -+ .Case("exchange", true) -+ .Case("fetch_add", true) -+ .Case("fetch_sub", true) -+ .Case("fetch_min", true) -+ .Case("fetch_max", true) -+ .Case("load", true) -+ .Case("store", true) -+ .Default(false); -+ if (IsFunctionCall) -+ return; -+ } -+ if (F->arg_size() != 3 && F->arg_size() != 4) { -+ IsFunctionCall = llvm::StringSwitch(Stem) -+ .Case("exchange_explicit", true) -+ .Case("fetch_add_explicit", true) -+ .Case("fetch_sub_explicit", true) -+ .Case("fetch_min_explicit", true) -+ .Case("fetch_max_explicit", true) -+ .Case("load_explicit", true) -+ .Case("store_explicit", true) -+ .Default(false); -+ if (IsFunctionCall) -+ return; -+ } -+ } - - auto PCI = &CI; - if (DemangledName == kOCLBuiltinName::AtomicInit) { -@@ -819,7 +872,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { - AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - mutateCallInstSPIRV( - M, CI, -- [=](CallInst *CI, std::vector &Args) { -+ [=](CallInst *CI, std::vector &Args) -> std::string { - Info.PostProc(Args); - // Order of args in OCL20: - // object, 0-2 other args, 1-2 order, scope -@@ -864,7 +917,21 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { - std::rotate(Args.begin() + 2, Args.begin() + OrderIdx, - Args.end() - Offset); - } -- return getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName)); -+ llvm::Type* AtomicBuiltinsReturnType = -+ CI->getCalledFunction()->getReturnType(); -+ auto IsFPType = [](llvm::Type *ReturnType) { -+ return ReturnType->isHalfTy() || ReturnType->isFloatTy() || -+ ReturnType->isDoubleTy(); -+ }; -+ auto SPIRVFunctionName = -+ getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName)); -+ if (!IsFPType(AtomicBuiltinsReturnType)) -+ return SPIRVFunctionName; -+ // Translate FP-typed atomic builtins. -+ return llvm::StringSwitch(SPIRVFunctionName) -+ .Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT") -+ .Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT") -+ .Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT"); - }, - &Attrs); - } -diff --git a/lib/SPIRV/SPIRVToOCL.h b/lib/SPIRV/SPIRVToOCL.h -index ddeec0b6..006fb0b1 100644 ---- a/lib/SPIRV/SPIRVToOCL.h -+++ b/lib/SPIRV/SPIRVToOCL.h -@@ -178,6 +178,9 @@ public: - /// using separate maps for OpenCL 1.2 and OpenCL 2.0 - virtual Instruction *mutateAtomicName(CallInst *CI, Op OC) = 0; - -+ // Transform FP atomic opcode to corresponding OpenCL function name -+ virtual std::string mapFPAtomicName(Op OC) = 0; -+ - private: - /// Transform uniform group opcode to corresponding OpenCL function name, - /// example: GroupIAdd(Reduce) => group_iadd => work_group_reduce_add | -diff --git a/lib/SPIRV/SPIRVToOCL12.cpp b/lib/SPIRV/SPIRVToOCL12.cpp -index afddd596..d7f00de3 100644 ---- a/lib/SPIRV/SPIRVToOCL12.cpp -+++ b/lib/SPIRV/SPIRVToOCL12.cpp -@@ -104,6 +104,9 @@ public: - /// cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions. - std::string mapAtomicName(Op OC, Type *Ty); - -+ // Transform FP atomic opcode to corresponding OpenCL function name -+ std::string mapFPAtomicName(Op OC) override; -+ - static char ID; - }; - -@@ -338,6 +341,21 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC) { - return NewCI; - } - -+std::string SPIRVToOCL12::mapFPAtomicName(Op OC) { -+ assert(isFPAtomicOpCode(OC) && "Not intended to handle other opcodes than " -+ "AtomicF{Add/Min/Max}EXT!"); -+ switch (OC) { -+ case OpAtomicFAddEXT: -+ return "atomic_add"; -+ case OpAtomicFMinEXT: -+ return "atomic_min"; -+ case OpAtomicFMaxEXT: -+ return "atomic_max"; -+ default: -+ llvm_unreachable("Unsupported opcode!"); -+ } -+} -+ - Instruction *SPIRVToOCL12::mutateAtomicName(CallInst *CI, Op OC) { - AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - return mutateCallInstOCL( -@@ -351,6 +369,9 @@ Instruction *SPIRVToOCL12::mutateAtomicName(CallInst *CI, Op OC) { - std::string SPIRVToOCL12::mapAtomicName(Op OC, Type *Ty) { - std::string Prefix = Ty->isIntegerTy(64) ? kOCLBuiltinName::AtomPrefix - : kOCLBuiltinName::AtomicPrefix; -+ // Map fp atomic instructions to regular OpenCL built-ins. -+ if (isFPAtomicOpCode(OC)) -+ return mapFPAtomicName(OC); - return Prefix += OCL12SPIRVBuiltinMap::rmap(OC); - } - -diff --git a/lib/SPIRV/SPIRVToOCL20.cpp b/lib/SPIRV/SPIRVToOCL20.cpp -index d829ff42..01d088e9 100644 ---- a/lib/SPIRV/SPIRVToOCL20.cpp -+++ b/lib/SPIRV/SPIRVToOCL20.cpp -@@ -82,6 +82,9 @@ public: - /// compare_exchange_strong/weak_explicit - Instruction *visitCallSPIRVAtomicCmpExchg(CallInst *CI, Op OC) override; - -+ // Transform FP atomic opcode to corresponding OpenCL function name -+ std::string mapFPAtomicName(Op OC) override; -+ - static char ID; - }; - -@@ -144,11 +147,29 @@ void SPIRVToOCL20::visitCallSPIRVControlBarrier(CallInst *CI) { - &Attrs); - } - -+std::string SPIRVToOCL20::mapFPAtomicName(Op OC) { -+ assert(isFPAtomicOpCode(OC) && "Not intended to handle other opcodes than " -+ "AtomicF{Add/Min/Max}EXT!"); -+ switch (OC) { -+ case OpAtomicFAddEXT: -+ return "atomic_fetch_add_explicit"; -+ case OpAtomicFMinEXT: -+ return "atomic_fetch_min_explicit"; -+ case OpAtomicFMaxEXT: -+ return "atomic_fetch_max_explicit"; -+ default: -+ llvm_unreachable("Unsupported opcode!"); -+ } -+} -+ - Instruction *SPIRVToOCL20::mutateAtomicName(CallInst *CI, Op OC) { - AttributeList Attrs = CI->getCalledFunction()->getAttributes(); - return mutateCallInstOCL( - M, CI, - [=](CallInst *, std::vector &Args) { -+ // Map fp atomic instructions to regular OpenCL built-ins. -+ if (isFPAtomicOpCode(OC)) -+ return mapFPAtomicName(OC); - return OCLSPIRVBuiltinMap::rmap(OC); - }, - &Attrs); -@@ -215,7 +236,12 @@ CallInst *SPIRVToOCL20::mutateCommonAtomicArguments(CallInst *CI, Op OC) { - } - } - auto Ptr = findFirstPtr(Args); -- auto Name = OCLSPIRVBuiltinMap::rmap(OC); -+ std::string Name; -+ // Map fp atomic instructions to regular OpenCL built-ins. -+ if (isFPAtomicOpCode(OC)) -+ Name = mapFPAtomicName(OC); -+ else -+ Name = OCLSPIRVBuiltinMap::rmap(OC); - auto NumOrder = getSPIRVAtomicBuiltinNumMemoryOrderArgs(OC); - auto ScopeIdx = Ptr + 1; - auto OrderIdx = Ptr + 2; -diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h -index 13f93fbe..7b707993 100644 ---- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h -+++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h -@@ -521,7 +521,6 @@ template <> inline void SPIRVMap::init() { - add(CapabilityAtomicFloat64AddEXT, "AtomicFloat64AddEXT"); - add(CapabilityAtomicFloat32MinMaxEXT, "AtomicFloat32MinMaxEXT"); - add(CapabilityAtomicFloat64MinMaxEXT, "AtomicFloat64MinMaxEXT"); -- add(CapabilityAtomicFloat16MinMaxEXT, "AtomicFloat16MinMaxEXT"); - add(CapabilitySubgroupShuffleINTEL, "SubgroupShuffleINTEL"); - add(CapabilitySubgroupBufferBlockIOINTEL, "SubgroupBufferBlockIOINTEL"); - add(CapabilitySubgroupImageBlockIOINTEL, "SubgroupImageBlockIOINTEL"); -diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h -index feec70f6..8e595e83 100644 ---- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h -+++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h -@@ -54,11 +54,17 @@ template <> inline void SPIRVMap::init() { - } - SPIRV_DEF_NAMEMAP(Op, OpCodeNameMap) - -+inline bool isFPAtomicOpCode(Op OpCode) { -+ return OpCode == OpAtomicFAddEXT || OpCode == OpAtomicFMinEXT || -+ OpCode == OpAtomicFMaxEXT; -+} -+ - inline bool isAtomicOpCode(Op OpCode) { - static_assert(OpAtomicLoad < OpAtomicXor, ""); - return ((unsigned)OpCode >= OpAtomicLoad && - (unsigned)OpCode <= OpAtomicXor) || -- OpCode == OpAtomicFlagTestAndSet || OpCode == OpAtomicFlagClear; -+ OpCode == OpAtomicFlagTestAndSet || OpCode == OpAtomicFlagClear || -+ isFPAtomicOpCode(OpCode); - } - inline bool isBinaryOpCode(Op OpCode) { - return ((unsigned)OpCode >= OpIAdd && (unsigned)OpCode <= OpFMod) || -diff --git a/test/AtomicFAddEXTForOCL.ll b/test/AtomicFAddEXTForOCL.ll -new file mode 100644 -index 00000000..fb146fb9 ---- /dev/null -+++ b/test/AtomicFAddEXTForOCL.ll -@@ -0,0 +1,64 @@ -+; RUN: llvm-as %s -o %t.bc -+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_add -o %t.spv -+; RUN: spirv-val %t.spv -+; RUN: llvm-spirv -to-text %t.spv -o %t.spt -+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV -+ -+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 -+ -+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV -+ -+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" -+target triple = "spir-unknown-unknown" -+ -+; CHECK-SPIRV: Capability AtomicFloat32AddEXT -+; CHECK-SPIRV: Capability AtomicFloat64AddEXT -+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_add" -+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 -+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 -+ -+ -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func void @test_atomic_float(float addrspace(1)* %a) local_unnamed_addr #0 { -+entry: -+ ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]] -+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+fiif]]({{.*}}) -+ %call = tail call spir_func float @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2 -+ ret void -+} -+ -+; Function Attrs: convergent -+declare spir_func float @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float -+ -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func void @test_atomic_double(double addrspace(1)* %a) local_unnamed_addr #0 { -+entry: -+ ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]] -+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+diid]]({{.*}}) -+ %call = tail call spir_func double @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2 -+ ret void -+} -+; Function Attrs: convergent -+declare spir_func double @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double -+ -+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float -+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double -+ -+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -+attributes #2 = { convergent nounwind } -+ -+!llvm.module.flags = !{!0} -+!opencl.ocl.version = !{!1} -+!opencl.spir.version = !{!1} -+!llvm.ident = !{!2} -+ -+!0 = !{i32 1, !"wchar_size", i32 4} -+!1 = !{i32 2, i32 0} -+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"} -diff --git a/test/AtomicFAddExt.ll b/test/AtomicFAddExt.ll -index 011dd8a7..42bdfeea 100644 ---- a/test/AtomicFAddExt.ll -+++ b/test/AtomicFAddExt.ll -@@ -4,20 +4,16 @@ - ; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV - - ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc --; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12 - --target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" --target triple = "spir64-unknown-unknown-sycldevice" -- --%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } --%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } --%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } -- --$_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any -+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 - --$_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any -+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV - --@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 -+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -+target triple = "spir64-unknown-unknown-sycldevice" - - ; CHECK-SPIRV: Capability AtomicFloat32AddEXT - ; CHECK-SPIRV: Capability AtomicFloat64AddEXT -@@ -25,62 +21,43 @@ $_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_3 - ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 - ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 - --; Function Attrs: convergent norecurse mustprogress --define weak_odr dso_local spir_kernel void @_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func float @_Z14AtomicFloatIncRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 { - entry: -- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 -- %1 = load i64, i64* %0, align 8 -- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1 -- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 -- %3 = load i64, i64* %2, align 8 -- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3 -- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5 -- %5 = extractelement <3 x i64> %4, i64 0 -+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)* - ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]] -- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}}) -- %call3.i.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float 1.000000e+00) #2 -- %add.i.i = fadd float %call3.i.i.i.i, 1.000000e+00 -- %sext.i = shl i64 %5, 32 -- %conv5.i = ashr exact i64 %sext.i, 32 -- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv5.i -- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)* -- store float %add.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14 -- ret void -+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_add[[:alnum:]]+ff]]({{.*}}) -+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+fiif]]({{.*}}) -+ %call3.i.i = tail call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2 -+ ret float %call3.i.i - } - - ; Function Attrs: convergent --; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float) - declare dso_local spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float - --; Function Attrs: convergent norecurse mustprogress --define weak_odr dso_local spir_kernel void @_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func double @_Z15AtomicDoubleIncRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 { - entry: -- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 -- %1 = load i64, i64* %0, align 8 -- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1 -- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 -- %3 = load i64, i64* %2, align 8 -- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3 -- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18 -- %5 = extractelement <3 x i64> %4, i64 0 -+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)* - ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]] -- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}}) -- %call3.i.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double 1.000000e+00) #2 -- %add.i.i = fadd double %call3.i.i.i.i, 1.000000e+00 -- %sext.i = shl i64 %5, 32 -- %conv5.i = ashr exact i64 %sext.i, 32 -- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv5.i -- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)* -- store double %add.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27 -- ret void -+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_add[[:alnum:]]+dd]]({{.*}}) -+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+diid]]({{.*}}) -+ %call3.i.i = tail call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2 -+ ret double %call3.i.i - } - - ; Function Attrs: convergent --; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double) - declare dso_local spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double - --attributes #0 = { convergent norecurse } --attributes #1 = { convergent } -+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float -+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double -+ -+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -+attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #2 = { convergent nounwind } - - !llvm.module.flags = !{!0} -@@ -91,29 +68,5 @@ attributes #2 = { convergent nounwind } - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 1, i32 2} - !2 = !{i32 4, i32 100000} --!3 = !{!"clang version 12.0.0"} --!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} --!5 = !{!6, !8, !10, !12} --!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} --!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} --!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} --!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} --!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} --!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} --!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} --!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} --!14 = !{!15, !15, i64 0} --!15 = !{!"float", !16, i64 0} --!16 = !{!"omnipotent char", !17, i64 0} --!17 = !{!"Simple C++ TBAA"} --!18 = !{!19, !21, !23, !25} --!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} --!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} --!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} --!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} --!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} --!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} --!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} --!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} --!27 = !{!28, !28, i64 0} --!28 = !{!"double", !16, i64 0} -+!3 = !{!"clang version 13.0.0"} -+ -diff --git a/test/AtomicFMaxEXT.ll b/test/AtomicFMaxEXT.ll -index 1b81e53b..1c2eec93 100644 ---- a/test/AtomicFMaxEXT.ll -+++ b/test/AtomicFMaxEXT.ll -@@ -4,20 +4,16 @@ - ; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV - - ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc --; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12 - --target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" --target triple = "spir64-unknown-unknown-sycldevice" -- --%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } --%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } --%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } -- --$_ZTSZZ8max_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any -+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 - --$_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any -+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV - --@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 -+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -+target triple = "spir64-unknown-unknown-sycldevice" - - ; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT - ; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT -@@ -25,65 +21,42 @@ $_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4item - ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 - ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 - --; Function Attrs: convergent norecurse --define weak_odr dso_local spir_kernel void @_ZTSZZ8max_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func float @_Z14AtomicFloatMaxRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 { - entry: -- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 -- %1 = load i64, i64* %0, align 8 -- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1 -- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 -- %3 = load i64, i64* %2, align 8 -- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3 -- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5 -- %5 = extractelement <3 x i64> %4, i64 0 -- %conv.i = trunc i64 %5 to i32 -- %conv3.i = sitofp i32 %conv.i to float -- %add.i = fadd float %conv3.i, 1.000000e+00 -+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)* - ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_32]] -- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+]]({{.*}}) -- %call3.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float %add.i) #2 -- %sext.i = shl i64 %5, 32 -- %conv6.i = ashr exact i64 %sext.i, 32 -- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv6.i -- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)* -- store float %call3.i.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14 -- ret void -+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_max[[:alnum:]]+ff]]({{.*}}) -+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+fiif]]({{.*}}) -+ %call.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2 -+ ret float %call.i.i.i - } - - ; Function Attrs: convergent --; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float) - declare dso_local spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float - --; Function Attrs: convergent norecurse --define weak_odr dso_local spir_kernel void @_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func double @_Z15AtomicDoubleMaxRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 { - entry: -- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 -- %1 = load i64, i64* %0, align 8 -- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1 -- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 -- %3 = load i64, i64* %2, align 8 -- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3 -- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18 -- %5 = extractelement <3 x i64> %4, i64 0 -- %conv.i = trunc i64 %5 to i32 -- %conv3.i = sitofp i32 %conv.i to double -- %add.i = fadd double %conv3.i, 1.000000e+00 -+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)* - ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_64]] -- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+]]({{.*}}) -- %call3.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double %add.i) #2 -- %sext.i = shl i64 %5, 32 -- %conv6.i = ashr exact i64 %sext.i, 32 -- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv6.i -- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)* -- store double %call3.i.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27 -- ret void -+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_max[[:alnum:]]+dd]]({{.*}}) -+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+diid]]({{.*}}) -+ %call.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2 -+ ret double %call.i.i.i - } - - ; Function Attrs: convergent --; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double) - declare dso_local spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double - --attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float -+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double -+ -+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #2 = { convergent nounwind } - -@@ -95,29 +68,5 @@ attributes #2 = { convergent nounwind } - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 1, i32 2} - !2 = !{i32 4, i32 100000} --!3 = !{!"clang version 12.0.0"} --!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} --!5 = !{!6, !8, !10, !12} --!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} --!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} --!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} --!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} --!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} --!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} --!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} --!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} --!14 = !{!15, !15, i64 0} --!15 = !{!"float", !16, i64 0} --!16 = !{!"omnipotent char", !17, i64 0} --!17 = !{!"Simple C++ TBAA"} --!18 = !{!19, !21, !23, !25} --!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} --!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} --!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} --!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} --!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} --!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} --!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} --!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} --!27 = !{!28, !28, i64 0} --!28 = !{!"double", !16, i64 0} -+!3 = !{!"clang version 13.0.0"} -+ -diff --git a/test/AtomicFMaxEXTForOCL.ll b/test/AtomicFMaxEXTForOCL.ll -new file mode 100644 -index 00000000..1f2530d9 ---- /dev/null -+++ b/test/AtomicFMaxEXTForOCL.ll -@@ -0,0 +1,64 @@ -+; RUN: llvm-as %s -o %t.bc -+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_min_max -o %t.spv -+; RUN: spirv-val %t.spv -+; RUN: llvm-spirv -to-text %t.spv -o %t.spt -+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV -+ -+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 -+ -+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV -+ -+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" -+target triple = "spir-unknown-unknown" -+ -+; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT -+; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT -+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_min_max" -+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 -+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 -+ -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func void @test_float(float addrspace(1)* %a) local_unnamed_addr #0 { -+entry: -+ ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_32]] -+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+fiif]]({{.*}}) -+ %call = tail call spir_func float @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2 -+ ret void -+} -+ -+; Function Attrs: convergent -+declare spir_func float @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float -+ -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func void @test_double(double addrspace(1)* %a) local_unnamed_addr #0 { -+entry: -+ ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_64]] -+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+diid]]({{.*}}) -+ %call = tail call spir_func double @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2 -+ ret void -+} -+ -+; Function Attrs: convergent -+declare spir_func double @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double -+ -+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float -+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double -+ -+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -+attributes #2 = { convergent nounwind } -+ -+!llvm.module.flags = !{!0} -+!opencl.ocl.version = !{!1} -+!opencl.spir.version = !{!1} -+!llvm.ident = !{!2} -+ -+!0 = !{i32 1, !"wchar_size", i32 4} -+!1 = !{i32 2, i32 0} -+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"} -diff --git a/test/AtomicFMinEXT.ll b/test/AtomicFMinEXT.ll -index 98c98b8e..9e40a669 100644 ---- a/test/AtomicFMinEXT.ll -+++ b/test/AtomicFMinEXT.ll -@@ -4,20 +4,16 @@ - ; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV - - ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc --; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12 - --target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" --target triple = "spir64-unknown-unknown-sycldevice" -- --%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } --%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } --%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } -- --$_ZTSZZ8min_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any -+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 - --$_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any -+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV - --@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 -+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -+target triple = "spir64-unknown-unknown-sycldevice" - - ; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT - ; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT -@@ -25,65 +21,42 @@ $_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4item - ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 - ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 - --; Function Attrs: convergent norecurse --define weak_odr dso_local spir_kernel void @_ZTSZZ8min_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func float @_Z14AtomicFloatMinRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 { - entry: -- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 -- %1 = load i64, i64* %0, align 8 -- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1 -- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 -- %3 = load i64, i64* %2, align 8 -- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3 -- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5 -- %5 = extractelement <3 x i64> %4, i64 0 -- %conv.i = trunc i64 %5 to i32 -- %conv3.i = sitofp i32 %conv.i to float -- %add.i = fadd float %conv3.i, 1.000000e+00 -+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)* - ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_32]] -- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+]]({{.*}}) -- %call3.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float %add.i) #2 -- %sext.i = shl i64 %5, 32 -- %conv6.i = ashr exact i64 %sext.i, 32 -- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv6.i -- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)* -- store float %call3.i.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14 -- ret void -+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_min[[:alnum:]]+ff]]({{.*}}) -+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+fiif]]({{.*}}) -+ %call.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2 -+ ret float %call.i.i.i - } - - ; Function Attrs: convergent --; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float) - declare dso_local spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float - --; Function Attrs: convergent norecurse --define weak_odr dso_local spir_kernel void @_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func double @_Z15AtomicDoubleMinRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 { - entry: -- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 -- %1 = load i64, i64* %0, align 8 -- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1 -- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 -- %3 = load i64, i64* %2, align 8 -- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3 -- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18 -- %5 = extractelement <3 x i64> %4, i64 0 -- %conv.i = trunc i64 %5 to i32 -- %conv3.i = sitofp i32 %conv.i to double -- %add.i = fadd double %conv3.i, 1.000000e+00 -+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)* - ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_64]] -- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+]]({{.*}}) -- %call3.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double %add.i) #2 -- %sext.i = shl i64 %5, 32 -- %conv6.i = ashr exact i64 %sext.i, 32 -- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv6.i -- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)* -- store double %call3.i.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27 -- ret void -+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_min[[:alnum:]]+dd]]({{.*}}) -+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+diid]]({{.*}}) -+ %call.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2 -+ ret double %call.i.i.i - } - - ; Function Attrs: convergent --; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double) - declare dso_local spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double - --attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float -+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double -+ -+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #2 = { convergent nounwind } - -@@ -95,29 +68,5 @@ attributes #2 = { convergent nounwind } - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 1, i32 2} - !2 = !{i32 4, i32 100000} --!3 = !{!"clang version 12.0.0 (https://github.com/otcshare/llvm.git 67add71766d55d6a8d8d894822f583d6365a3b7d)"} --!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} --!5 = !{!6, !8, !10, !12} --!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} --!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} --!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} --!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} --!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} --!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} --!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} --!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} --!14 = !{!15, !15, i64 0} --!15 = !{!"float", !16, i64 0} --!16 = !{!"omnipotent char", !17, i64 0} --!17 = !{!"Simple C++ TBAA"} --!18 = !{!19, !21, !23, !25} --!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} --!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} --!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} --!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} --!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} --!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} --!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} --!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} --!27 = !{!28, !28, i64 0} --!28 = !{!"double", !16, i64 0} -+!3 = !{!"clang version 13.0.0"} -+ -diff --git a/test/AtomicFMinEXTForOCL.ll b/test/AtomicFMinEXTForOCL.ll -new file mode 100644 -index 00000000..6196b0f8 ---- /dev/null -+++ b/test/AtomicFMinEXTForOCL.ll -@@ -0,0 +1,64 @@ -+; RUN: llvm-as %s -o %t.bc -+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_min_max -o %t.spv -+; RUN: spirv-val %t.spv -+; RUN: llvm-spirv -to-text %t.spv -o %t.spt -+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV -+ -+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 -+ -+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc -+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV -+ -+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" -+target triple = "spir-unknown-unknown" -+ -+; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT -+; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT -+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_min_max" -+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 -+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 -+ -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func void @test_float(float addrspace(1)* %a) local_unnamed_addr #0 { -+entry: -+ ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_32]] -+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+fiif]]({{.*}}) -+ %call = tail call spir_func float @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2 -+ ret void -+} -+ -+; Function Attrs: convergent -+declare spir_func float @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float -+ -+; Function Attrs: convergent norecurse nounwind -+define dso_local spir_func void @test_double(double addrspace(1)* %a) local_unnamed_addr #0 { -+entry: -+ ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_64]] -+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) -+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+diid]]({{.*}}) -+ %call = tail call spir_func double @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2 -+ ret void -+} -+ -+; Function Attrs: convergent -+declare spir_func double @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1 -+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double -+ -+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float -+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double -+ -+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -+attributes #2 = { convergent nounwind } -+ -+!llvm.module.flags = !{!0} -+!opencl.ocl.version = !{!1} -+!opencl.spir.version = !{!1} -+!llvm.ident = !{!2} -+ -+!0 = !{i32 1, !"wchar_size", i32 4} -+!1 = !{i32 2, i32 0} -+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"} -diff --git a/test/InvalidAtomicBuiltins.cl b/test/InvalidAtomicBuiltins.cl -index b8ec5b89..2182f070 100644 ---- a/test/InvalidAtomicBuiltins.cl -+++ b/test/InvalidAtomicBuiltins.cl -@@ -41,13 +41,9 @@ float __attribute__((overloadable)) atomic_fetch_xor(volatile generic atomic_flo - double __attribute__((overloadable)) atomic_fetch_and(volatile generic atomic_double *object, double operand, memory_order order); - double __attribute__((overloadable)) atomic_fetch_max(volatile generic atomic_double *object, double operand, memory_order order); - double __attribute__((overloadable)) atomic_fetch_min(volatile generic atomic_double *object, double operand, memory_order order); --float __attribute__((overloadable)) atomic_fetch_add_explicit(volatile generic atomic_float *object, float operand, memory_order order); --float __attribute__((overloadable)) atomic_fetch_sub_explicit(volatile generic atomic_float *object, float operand, memory_order order); - float __attribute__((overloadable)) atomic_fetch_or_explicit(volatile generic atomic_float *object, float operand, memory_order order); - float __attribute__((overloadable)) atomic_fetch_xor_explicit(volatile generic atomic_float *object, float operand, memory_order order); - double __attribute__((overloadable)) atomic_fetch_and_explicit(volatile generic atomic_double *object, double operand, memory_order order); --double __attribute__((overloadable)) atomic_fetch_max_explicit(volatile generic atomic_double *object, double operand, memory_order order); --double __attribute__((overloadable)) atomic_fetch_min_explicit(volatile generic atomic_double *object, double operand, memory_order order); - - __kernel void test_atomic_fn(volatile __global float *p, - volatile __global double *pp, -@@ -86,11 +82,7 @@ __kernel void test_atomic_fn(volatile __global float *p, - d = atomic_fetch_and(pp, val, order); - d = atomic_fetch_min(pp, val, order); - d = atomic_fetch_max(pp, val, order); -- f = atomic_fetch_add_explicit(p, val, order); -- f = atomic_fetch_sub_explicit(p, val, order); - f = atomic_fetch_or_explicit(p, val, order); - f = atomic_fetch_xor_explicit(p, val, order); - d = atomic_fetch_and_explicit(pp, val, order); -- d = atomic_fetch_min_explicit(pp, val, order); -- d = atomic_fetch_max_explicit(pp, val, order); - } --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch deleted file mode 100644 index 3b035f47..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch +++ /dev/null @@ -1,35 +0,0 @@ -From cfb18b75e8a353bc7486f337541476a36994b063 Mon Sep 17 00:00:00 2001 -From: juanrod2 <> -Date: Tue, 22 Dec 2020 08:33:08 +0800 -Subject: [PATCH 3/7] Memory leak fix for Managed Static Mutex - -Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch] - -Signed-off-by: Naveen Saini - -Cleaning a mutex inside ManagedStatic llvm class. ---- - llvm/lib/Support/ManagedStatic.cpp | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/llvm/lib/Support/ManagedStatic.cpp b/llvm/lib/Support/ManagedStatic.cpp -index 053493f72fb5..6571580ccecf 100644 ---- a/llvm/lib/Support/ManagedStatic.cpp -+++ b/llvm/lib/Support/ManagedStatic.cpp -@@ -76,8 +76,12 @@ void ManagedStaticBase::destroy() const { - - /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. - void llvm::llvm_shutdown() { -- std::lock_guard Lock(*getManagedStaticMutex()); -+ getManagedStaticMutex()->lock(); - - while (StaticList) - StaticList->destroy(); -+ -+ getManagedStaticMutex()->unlock(); -+ delete ManagedStaticMutex; -+ ManagedStaticMutex = nullptr; - } --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0004-Remove-repo-name-in-LLVM-IR.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0004-Remove-repo-name-in-LLVM-IR.patch deleted file mode 100644 index f8dec996..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0004-Remove-repo-name-in-LLVM-IR.patch +++ /dev/null @@ -1,49 +0,0 @@ -From b794037bf1f90a93efa4c542855ad569cb13b4c5 Mon Sep 17 00:00:00 2001 -From: Feng Zou -Date: Mon, 19 Oct 2020 14:43:38 +0800 -Subject: [PATCH 4/7] Remove repo name in LLVM IR - -Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0003-Remove-repo-name-in-LLVM-IR.patch] -Signed-off-by: Feng Zou -Signed-off-by: Naveen Saini ---- - llvm/cmake/modules/VersionFromVCS.cmake | 23 ++++++++++++----------- - 1 file changed, 12 insertions(+), 11 deletions(-) - -diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake -index 1b6519b4b7c4..8fd6b23bb345 100644 ---- a/llvm/cmake/modules/VersionFromVCS.cmake -+++ b/llvm/cmake/modules/VersionFromVCS.cmake -@@ -33,17 +33,18 @@ function(get_source_info path revision repository) - else() - set(remote "origin") - endif() -- execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote} -- WORKING_DIRECTORY ${path} -- RESULT_VARIABLE git_result -- OUTPUT_VARIABLE git_output -- ERROR_QUIET) -- if(git_result EQUAL 0) -- string(STRIP "${git_output}" git_output) -- set(${repository} ${git_output} PARENT_SCOPE) -- else() -- set(${repository} ${path} PARENT_SCOPE) -- endif() -+ # Do not show repo name in IR -+ # execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote} -+ # WORKING_DIRECTORY ${path} -+ # RESULT_VARIABLE git_result -+ # OUTPUT_VARIABLE git_output -+ # ERROR_QUIET) -+ # if(git_result EQUAL 0) -+ # string(STRIP "${git_output}" git_output) -+ # set(${repository} ${git_output} PARENT_SCOPE) -+ # else() -+ # set(${repository} ${path} PARENT_SCOPE) -+ # endif() - endif() - endif() - endfunction() --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch deleted file mode 100644 index f8f177e5..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 3dd4766499d25e5978a5d90001f18e657e875da0 Mon Sep 17 00:00:00 2001 -From: haonanya -Date: Thu, 12 Aug 2021 15:48:34 +0800 -Subject: [PATCH 5/7] Remove __IMAGE_SUPPORT__ macro for SPIR since SPIR - doesn't require image support - -Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0003-Remove-__IMAGE_SUPPORT__-macro-for-SPIR.patch] - -Signed-off-by: haonanya -Signed-off-by: Naveen Saini ---- - clang/lib/Frontend/InitPreprocessor.cpp | 3 --- - clang/test/Preprocessor/predefined-macros.c | 4 ---- - 2 files changed, 7 deletions(-) - -diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp -index aefd208e6cd3..b4a84636673a 100644 ---- a/clang/lib/Frontend/InitPreprocessor.cpp -+++ b/clang/lib/Frontend/InitPreprocessor.cpp -@@ -1108,9 +1108,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI, - if (TI.getSupportedOpenCLOpts().isSupported(#Ext)) \ - Builder.defineMacro(#Ext); - #include "clang/Basic/OpenCLExtensions.def" -- -- if (TI.getTriple().isSPIR()) -- Builder.defineMacro("__IMAGE_SUPPORT__"); - } - - if (TI.hasInt128Type() && LangOpts.CPlusPlus && LangOpts.GNUMode) { -diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c -index b088a37ba665..39a222d02faf 100644 ---- a/clang/test/Preprocessor/predefined-macros.c -+++ b/clang/test/Preprocessor/predefined-macros.c -@@ -184,10 +184,6 @@ - // MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 - // MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 - --// RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spir-unknown-unknown \ --// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIR --// CHECK-SPIR: #define __IMAGE_SUPPORT__ 1 -- - // RUN: %clang_cc1 %s -E -dM -o - -x hip -triple amdgcn-amd-amdhsa \ - // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-HIP - // CHECK-HIP-NOT: #define __CUDA_ARCH__ --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch deleted file mode 100644 index 0b4ee8c7..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 2c53abd0008bbecfcfe871c6060f4bbf1c94c74a Mon Sep 17 00:00:00 2001 -From: Raphael Isemann -Date: Thu, 1 Apr 2021 18:41:44 +0200 -Subject: [PATCH 6/7] Avoid calling ParseCommandLineOptions in BackendUtil if - possible - -Calling `ParseCommandLineOptions` should only be called from `main` as the -CommandLine setup code isn't thread-safe. As BackendUtil is part of the -generic Clang FrontendAction logic, a process which has several threads executing -Clang FrontendActions will randomly crash in the unsafe setup code. - -This patch avoids calling the function unless either the debug-pass option or -limit-float-precision option is set. Without these two options set the -`ParseCommandLineOptions` call doesn't do anything beside parsing -the command line `clang` which doesn't set any options. - -See also D99652 where LLDB received a workaround for this crash. - -Reviewed By: JDevlieghere - -Differential Revision: https://reviews.llvm.org/D99740 - -Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0004-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch] - -Signed-off-by: Raphael Isemann -Signed-off-by: Naveen Saini ---- - clang/lib/CodeGen/BackendUtil.cpp | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp -index 0bfcab88a3a9..db8fd4166d7a 100644 ---- a/clang/lib/CodeGen/BackendUtil.cpp -+++ b/clang/lib/CodeGen/BackendUtil.cpp -@@ -743,7 +743,15 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) { - BackendArgs.push_back("-limit-float-precision"); - BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str()); - } -+ // Check for the default "clang" invocation that won't set any cl::opt values. -+ // Skip trying to parse the command line invocation to avoid the issues -+ // described below. -+ if (BackendArgs.size() == 1) -+ return; - BackendArgs.push_back(nullptr); -+ // FIXME: The command line parser below is not thread-safe and shares a global -+ // state, so this call might crash or overwrite the options of another Clang -+ // instance in the same process. - llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1, - BackendArgs.data()); - } --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0007-support-cl_ext_float_atomics.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0007-support-cl_ext_float_atomics.patch deleted file mode 100644 index f7d191ff..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0007-support-cl_ext_float_atomics.patch +++ /dev/null @@ -1,377 +0,0 @@ -From a685de6fc45afcdbe4a7120e9d5b33e175dd71cd Mon Sep 17 00:00:00 2001 -From: haonanya -Date: Fri, 13 Aug 2021 10:00:02 +0800 -Subject: [PATCH 7/7] support cl_ext_float_atomics - -Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch] - -Signed-off-by: haonanya -Signed-off-by: Naveen Saini ---- - clang/lib/Headers/opencl-c-base.h | 25 ++++ - clang/lib/Headers/opencl-c.h | 208 ++++++++++++++++++++++++++ - clang/test/Headers/opencl-c-header.cl | 96 ++++++++++++ - 3 files changed, 329 insertions(+) - -diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h -index 2cc688ccc3da..86bbee12fdf8 100644 ---- a/clang/lib/Headers/opencl-c-base.h -+++ b/clang/lib/Headers/opencl-c-base.h -@@ -14,6 +14,31 @@ - #define CL_VERSION_3_0 300 - #endif - -+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+// For SPIR all extensions are supported. -+#if defined(__SPIR__) -+#define cl_ext_float_atomics 1 -+#ifdef cl_khr_fp16 -+#define __opencl_c_ext_fp16_global_atomic_load_store 1 -+#define __opencl_c_ext_fp16_local_atomic_load_store 1 -+#define __opencl_c_ext_fp16_global_atomic_add 1 -+#define __opencl_c_ext_fp16_local_atomic_add 1 -+#define __opencl_c_ext_fp16_global_atomic_min_max 1 -+#define __opencl_c_ext_fp16_local_atomic_min_max 1 -+#endif -+#ifdef __opencl_c_fp64 -+#define __opencl_c_ext_fp64_global_atomic_add 1 -+#define __opencl_c_ext_fp64_local_atomic_add 1 -+#define __opencl_c_ext_fp64_global_atomic_min_max 1 -+#define __opencl_c_ext_fp64_local_atomic_min_max 1 -+#endif -+#define __opencl_c_ext_fp32_global_atomic_add 1 -+#define __opencl_c_ext_fp32_local_atomic_add 1 -+#define __opencl_c_ext_fp32_global_atomic_min_max 1 -+#define __opencl_c_ext_fp32_local_atomic_min_max 1 -+#endif // defined(__SPIR__) -+#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+ - // Define features for 2.0 for header backward compatibility - #ifndef __opencl_c_int64 - #define __opencl_c_int64 1 -diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h -index 67d900eb1c3d..b463e702d95e 100644 ---- a/clang/lib/Headers/opencl-c.h -+++ b/clang/lib/Headers/opencl-c.h -@@ -14354,6 +14354,214 @@ intptr_t __ovld atomic_fetch_max_explicit( - // defined(cl_khr_int64_extended_atomics) - #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) - -+#if defined(cl_ext_float_atomics) -+ -+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) -+float __ovld atomic_fetch_min(volatile __global atomic_float *object, -+ float operand); -+float __ovld atomic_fetch_max(volatile __global atomic_float *object, -+ float operand); -+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) -+ -+#if defined(__opencl_c_ext_fp32_local_atomic_min_max) -+float __ovld atomic_fetch_min(volatile __local atomic_float *object, -+ float operand); -+float __ovld atomic_fetch_max(volatile __local atomic_float *object, -+ float operand); -+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max) -+ -+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) || \ -+ defined(__opencl_c_ext_fp32_local_atomic_min_max) -+float __ovld atomic_fetch_min(volatile atomic_float *object, float operand); -+float __ovld atomic_fetch_max(volatile atomic_float *object, float operand); -+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) || \ -+ defined(__opencl_c_ext_fp32_local_atomic_min_max) -+ -+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) -+double __ovld atomic_fetch_min(volatile __global atomic_double *object, -+ double operand); -+double __ovld atomic_fetch_max(volatile __global atomic_double *object, -+ double operand); -+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) -+ -+#if defined(__opencl_c_ext_fp64_local_atomic_min_max) -+double __ovld atomic_fetch_min(volatile __local atomic_double *object, -+ double operand); -+double __ovld atomic_fetch_max(volatile __local atomic_double *object, -+ double operand); -+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max) -+ -+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) || \ -+ defined(__opencl_c_ext_fp64_local_atomic_min_max) -+double __ovld atomic_fetch_min(volatile atomic_double *object, double operand); -+double __ovld atomic_fetch_max(volatile atomic_double *object, double operand); -+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) || \ -+ defined(__opencl_c_ext_fp64_local_atomic_min_max) -+ -+#if defined(__opencl_c_ext_fp32_global_atomic_add) -+float __ovld atomic_fetch_add(volatile __global atomic_float *object, -+ float operand); -+float __ovld atomic_fetch_sub(volatile __global atomic_float *object, -+ float operand); -+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) -+ -+#if defined(__opencl_c_ext_fp32_local_atomic_add) -+float __ovld atomic_fetch_add(volatile __local atomic_float *object, -+ float operand); -+float __ovld atomic_fetch_sub(volatile __local atomic_float *object, -+ float operand); -+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_local_atomic_add) -+ -+#if defined(__opencl_c_ext_fp32_global_atomic_add) || \ -+ defined(__opencl_c_ext_fp32_local_atomic_add) -+float __ovld atomic_fetch_add(volatile atomic_float *object, float operand); -+float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand); -+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, -+ float operand, memory_order order); -+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, -+ float operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) || \ -+ defined(__opencl_c_ext_fp32_local_atomic_add) -+ -+#if defined(__opencl_c_ext_fp64_global_atomic_add) -+double __ovld atomic_fetch_add(volatile __global atomic_double *object, -+ double operand); -+double __ovld atomic_fetch_sub(volatile __global atomic_double *object, -+ double operand); -+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) -+ -+#if defined(__opencl_c_ext_fp64_local_atomic_add) -+double __ovld atomic_fetch_add(volatile __local atomic_double *object, -+ double operand); -+double __ovld atomic_fetch_sub(volatile __local atomic_double *object, -+ double operand); -+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_local_atomic_add) -+ -+#if defined(__opencl_c_ext_fp64_global_atomic_add) || \ -+ defined(__opencl_c_ext_fp64_local_atomic_add) -+double __ovld atomic_fetch_add(volatile atomic_double *object, double operand); -+double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand); -+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, -+ double operand, memory_order order); -+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, -+ double operand, memory_order order, -+ memory_scope scope); -+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) || \ -+ defined(__opencl_c_ext_fp64_local_atomic_add) -+ -+#endif // cl_ext_float_atomics -+ - // atomic_store() - - #if defined(__opencl_c_atomic_scope_device) && \ -diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl -index 2716076acdcf..7f720cf28142 100644 ---- a/clang/test/Headers/opencl-c-header.cl -+++ b/clang/test/Headers/opencl-c-header.cl -@@ -98,3 +98,99 @@ global atomic_int z = ATOMIC_VAR_INIT(99); - #pragma OPENCL EXTENSION cl_intel_planar_yuv : enable - - // CHECK-MOD: Reading modules -+ -+// For SPIR all extensions are supported. -+#if defined(__SPIR__) -+ -+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+ -+#if __opencl_c_ext_fp16_global_atomic_load_store != 1 -+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_load_store" -+#endif -+#if __opencl_c_ext_fp16_local_atomic_load_store != 1 -+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_load_store" -+#endif -+#if __opencl_c_ext_fp16_global_atomic_add != 1 -+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_add" -+#endif -+#if __opencl_c_ext_fp32_global_atomic_add != 1 -+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_add" -+#endif -+#if __opencl_c_ext_fp64_global_atomic_add != 1 -+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_add" -+#endif -+#if __opencl_c_ext_fp16_local_atomic_add != 1 -+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_add" -+#endif -+#if __opencl_c_ext_fp32_local_atomic_add != 1 -+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_add" -+#endif -+#if __opencl_c_ext_fp64_local_atomic_add != 1 -+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_add" -+#endif -+#if __opencl_c_ext_fp16_global_atomic_min_max != 1 -+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_min_max" -+#endif -+#if __opencl_c_ext_fp32_global_atomic_min_max != 1 -+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_min_max" -+#endif -+#if __opencl_c_ext_fp64_global_atomic_min_max != 1 -+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_min_max" -+#endif -+#if __opencl_c_ext_fp16_local_atomic_min_max != 1 -+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_min_max" -+#endif -+#if __opencl_c_ext_fp32_local_atomic_min_max != 1 -+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_min_max" -+#endif -+#if __opencl_c_ext_fp64_local_atomic_min_max != 1 -+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_min_max" -+#endif -+#else -+ -+#ifdef __opencl_c_ext_fp16_global_atomic_load_store -+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_load_store defined" -+#endif -+#ifdef __opencl_c_ext_fp16_local_atomic_load_store -+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_load_store defined" -+#endif -+#ifdef __opencl_c_ext_fp16_global_atomic_add -+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_add defined" -+#endif -+#ifdef __opencl_c_ext_fp32_global_atomic_add -+#error "Incorrectly __opencl_c_ext_fp32_global_atomic_add defined" -+#endif -+#ifdef __opencl_c_ext_fp64_global_atomic_add -+#error "Incorrectly __opencl_c_ext_fp64_global_atomic_add defined" -+#endif -+#ifdef __opencl_c_ext_fp16_local_atomic_add -+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_add defined" -+#endif -+#ifdef __opencl_c_ext_fp32_local_atomic_add -+#error "Incorrectly __opencl_c_ext_fp32_local_atomic_add defined" -+#endif -+#ifdef __opencl_c_ext_fp64_local_atomic_add -+#error "Incorrectly __opencl_c_ext_fp64_local_atomic_add defined" -+#endif -+#ifdef __opencl_c_ext_fp16_global_atomic_min_max -+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_min_max defined" -+#endif -+#ifdef __opencl_c_ext_fp32_global_atomic_min_max -+#error "Incorrectly __opencl_c_ext_fp32_global_atomic_min_max defined" -+#endif -+#ifdef __opencl_c_ext_fp64_global_atomic_min_max -+#error "Incorrectly __opencl_c_ext_fp64_global_atomic_min_max defined" -+#endif -+#ifdef __opencl_c_ext_fp16_local_atomic_min_max -+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_min_max defined" -+#endif -+#ifdef __opencl_c_ext_fp32_local_atomic_min_max -+#error "Incorrectly __opencl_c_ext_fp32_local_atomic_min_max defined" -+#endif -+#ifdef __opencl_c_ext_fp64_local_atomic_min_max -+#error "Incorrectly __opencl_c_ext_fp64_local_atomic_min_max defined" -+#endif -+ -+#endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -+ -+#endif // defined(__SPIR__) --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch deleted file mode 100644 index 09be8202..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 294ca2fd69a077b35acec9d498120d6cb0324dae Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Fri, 27 Aug 2021 11:53:27 +0800 -Subject: [PATCH 1/2] This patch is required to fix the crash referenced to in - #1767 - -It is a port of the following llvm 11.0 commit : https://reviews.llvm.org/D76994. - -Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/41f13f1f64d2074ae7512fb23656c22585e912bd] - -Signed-off-by: Naveen Saini ---- - .../CodeGen/SelectionDAG/LegalizeTypes.cpp | 3 +- - llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 31 ++++++++++++------- - 2 files changed, 21 insertions(+), 13 deletions(-) - -diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp -index 63ddb59fce68..822da2183269 100644 ---- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp -+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp -@@ -173,7 +173,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { - } - } - } -- -+#ifndef NDEBUG - // Checked that NewNodes are only used by other NewNodes. - for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { - SDNode *N = NewNodes[i]; -@@ -181,6 +181,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { - UI != UE; ++UI) - assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); - } -+#endif - } - - /// This is the main entry point for the type legalizer. This does a top-down -diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h -index faae14444d51..b908c5c58e9f 100644 ---- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h -+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h -@@ -155,7 +155,9 @@ private: - const SDValue &getSDValue(TableId &Id) { - RemapId(Id); - assert(Id && "TableId should be non-zero"); -- return IdToValueMap[Id]; -+ auto I = IdToValueMap.find(Id); -+ assert(I != IdToValueMap.end() && "cannot find Id in map"); -+ return I->second; - } - - public: -@@ -172,24 +174,29 @@ public: - bool run(); - - void NoteDeletion(SDNode *Old, SDNode *New) { -+ assert(Old != New && "node replaced with self"); - for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { - TableId NewId = getTableId(SDValue(New, i)); - TableId OldId = getTableId(SDValue(Old, i)); - -- if (OldId != NewId) -+ if (OldId != NewId) { - ReplacedValues[OldId] = NewId; - -- // Delete Node from tables. -+ // Delete Node from tables. We cannot do this when OldId == NewId, -+ // because NewId can still have table references to it in -+ // ReplacedValues. -+ IdToValueMap.erase(OldId); -+ PromotedIntegers.erase(OldId); -+ ExpandedIntegers.erase(OldId); -+ SoftenedFloats.erase(OldId); -+ PromotedFloats.erase(OldId); -+ ExpandedFloats.erase(OldId); -+ ScalarizedVectors.erase(OldId); -+ SplitVectors.erase(OldId); -+ WidenedVectors.erase(OldId); -+ } -+ - ValueToIdMap.erase(SDValue(Old, i)); -- IdToValueMap.erase(OldId); -- PromotedIntegers.erase(OldId); -- ExpandedIntegers.erase(OldId); -- SoftenedFloats.erase(OldId); -- PromotedFloats.erase(OldId); -- ExpandedFloats.erase(OldId); -- ScalarizedVectors.erase(OldId); -- SplitVectors.erase(OldId); -- WidenedVectors.erase(OldId); - } - } - --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch deleted file mode 100644 index 72877d83..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch +++ /dev/null @@ -1,105 +0,0 @@ -From d266087e8dba9e8fd4984e1cb85c20376e2c8ea3 Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Fri, 27 Aug 2021 11:56:01 +0800 -Subject: [PATCH 2/2] This patch is a fix for #1788. - -It is a port of the following llvm 11.0 commit: https://reviews.llvm.org/D81698 -This also needed part of another llvm 11.0 commit: https://reviews.llvm.org/D72975 - -Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/aeb50448019ce1b1002f3781f9647d486320d83c] - -Signed-off-by: Naveen Saini ---- - llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++++++--- - .../InstCombine/InstructionCombining.cpp | 27 +++++++++++++++++-- - 2 files changed, 44 insertions(+), 5 deletions(-) - -diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h -index 6621fc9f819c..fb7ad93519f6 100644 ---- a/llvm/include/llvm/IR/PatternMatch.h -+++ b/llvm/include/llvm/IR/PatternMatch.h -@@ -152,8 +152,10 @@ inline match_combine_and m_CombineAnd(const LTy &L, const RTy &R) { - - struct apint_match { - const APInt *&Res; -+ bool AllowUndef; - -- apint_match(const APInt *&R) : Res(R) {} -+ apint_match(const APInt *&Res, bool AllowUndef) -+ : Res(Res), AllowUndef(AllowUndef) {} - - template bool match(ITy *V) { - if (auto *CI = dyn_cast(V)) { -@@ -162,7 +164,8 @@ struct apint_match { - } - if (V->getType()->isVectorTy()) - if (const auto *C = dyn_cast(V)) -- if (auto *CI = dyn_cast_or_null(C->getSplatValue())) { -+ if (auto *CI = dyn_cast_or_null( -+ C->getSplatValue(AllowUndef))) { - Res = &CI->getValue(); - return true; - } -@@ -192,7 +195,20 @@ struct apfloat_match { - - /// Match a ConstantInt or splatted ConstantVector, binding the - /// specified pointer to the contained APInt. --inline apint_match m_APInt(const APInt *&Res) { return Res; } -+inline apint_match m_APInt(const APInt *&Res) { -+ // Forbid undefs by default to maintain previous behavior. -+ return apint_match(Res, /* AllowUndef */ false); -+} -+ -+/// Match APInt while allowing undefs in splat vector constants. -+inline apint_match m_APIntAllowUndef(const APInt *&Res) { -+ return apint_match(Res, /* AllowUndef */ true); -+} -+ -+/// Match APInt while forbidding undefs in splat vector constants. -+inline apint_match m_APIntForbidUndef(const APInt *&Res) { -+ return apint_match(Res, /* AllowUndef */ false); -+} - - /// Match a ConstantFP or splatted ConstantVector, binding the - /// specified pointer to the contained APFloat. -diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp -index bf32996d96e2..40a246b9d7a7 100644 ---- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp -+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp -@@ -925,8 +925,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { - if (auto *CI = dyn_cast(SI->getCondition())) { - if (CI->hasOneUse()) { - Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); -- if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || -- (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) -+ -+ // FIXME: This is a hack to avoid infinite looping with min/max patterns. -+ // We have to ensure that vector constants that only differ with -+ // undef elements are treated as equivalent. -+ auto areLooselyEqual = [](Value *A, Value *B) { -+ if (A == B) -+ return true; -+ -+ // Test for vector constants. -+ Constant *ConstA, *ConstB; -+ if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB))) -+ return false; -+ -+ // TODO: Deal with FP constants? -+ if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType()) -+ return false; -+ -+ // Compare for equality including undefs as equal. -+ auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB); -+ const APInt *C; -+ return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue(); -+ }; -+ -+ if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) || -+ (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1))) - return nullptr; - } - } --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch deleted file mode 100644 index fc6935a1..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 8f83e2b7618da7a98a30839a8f41a6dd82dec468 Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Fri, 27 Aug 2021 12:00:23 +0800 -Subject: [PATCH 1/2] This patch is required to fix stability problem #1793 - -It's backport of the following llvm 11.0 commit: 120c5f1057dc50229f73bc75bbabf4df6ee50fef - -Upstream-Status: Backport - -Signed-off-by: Naveen Saini ---- - llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -index 2476fd26f250..2743acc89bca 100644 ---- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -@@ -10702,8 +10702,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - -+ // zext_vector_inreg(undef) = 0 because the top bits will be zero. - if (N0.isUndef()) -- return DAG.getUNDEF(VT); -+ return DAG.getConstant(0, SDLoc(N), VT); - - if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) - return Res; -@@ -10718,8 +10719,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - -+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same. - if (N0.isUndef()) -- return DAG.getUNDEF(VT); -+ return DAG.getConstant(0, SDLoc(N), VT); - - if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) - return Res; --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch deleted file mode 100644 index e3e70107..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 62b05a69b4a185cd0b7535f19742686e19fcaf22 Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Fri, 27 Aug 2021 12:02:37 +0800 -Subject: [PATCH 2/2] Fix for #1844, affects avx512skx-i8x64 and - avx512skx-i16x32. - -It's a port of 11.0 commit edcfb47ff6d5562e22207f364c65f84302aa346b -https://reviews.llvm.org/D76312 - -Upstream-Status: Backport - -Signed-off-by: Naveen Saini ---- - llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -index 2743acc89bca..439a8367dabe 100644 ---- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -@@ -10841,7 +10841,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { - - // Attempt to pre-truncate BUILD_VECTOR sources. - if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && -- TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) { -+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) && -+ // Avoid creating illegal types if running after type legalizer. -+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) { - SDLoc DL(N); - EVT SVT = VT.getScalarType(); - SmallVector TruncOps; --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch deleted file mode 100644 index 8aca5fbf..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch +++ /dev/null @@ -1,40 +0,0 @@ -From cc4301f82ca1bde1d438c3708de285b0ab8c72d3 Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Fri, 27 Aug 2021 12:07:25 +0800 -Subject: [PATCH 1/2] [X86] createVariablePermute - handle case where recursive - createVariablePermute call fails - -Account for the case where a recursive createVariablePermute call with a wider vector type fails. - -Original test case from @craig.topper (Craig Topper) - -Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804] - -Signed-off-by: Simon Pilgrim -Signed-off-by: Naveen Saini ---- - llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp -index c8720d9ae3a6..63eb050e9b3a 100644 ---- a/llvm/lib/Target/X86/X86ISelLowering.cpp -+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp -@@ -9571,9 +9571,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, - IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); - IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false, - Subtarget, DAG, SDLoc(IndicesVec)); -- return extractSubVector( -- createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0, -- DAG, DL, SizeInBits); -+ SDValue NewSrcVec = -+ createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); -+ if (NewSrcVec) -+ return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits); -+ return SDValue(); - } else if (SrcVec.getValueSizeInBits() < SizeInBits) { - // Widen smaller SrcVec to match VT. - SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec)); --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch deleted file mode 100644 index e03c279f..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 9cdff0785d5cf9effc8e922d3330311c4d3dda78 Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Fri, 27 Aug 2021 12:09:42 +0800 -Subject: [PATCH 2/2] This patch is needed for avx512skx-i8x64 and - avx512skx-i16x32 targets. - -This is combination of two commits: -- 0cd6712a7af0fa2702b5d4cc733500eb5e62e7d0 - stability fix. -- d8ad7cc0885f32104a7cd83c77191aec15fd684f - performance follow up. - -Upstream-Status: Backport - -Signed-off-by: Naveen Saini ---- - llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 +++++++++++++++++-- - 1 file changed, 21 insertions(+), 2 deletions(-) - -diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -index 439a8367dabe..b1639c7f275d 100644 ---- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -@@ -18471,6 +18471,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { - - // Allow targets to opt-out. - EVT VT = Extract->getValueType(0); -+ -+ // We can only create byte sized loads. -+ if (!VT.isByteSized()) -+ return SDValue(); -+ -+ unsigned Index = ExtIdx->getZExtValue(); -+ unsigned NumElts = VT.getVectorNumElements(); -+ -+ // If the index is a multiple of the extract element count, we can offset the -+ // address by the store size multiplied by the subvector index. Otherwise if -+ // the scalar type is byte sized, we can just use the index multiplied by -+ // the element size in bytes as the offset. -+ unsigned Offset; -+ if (Index % NumElts == 0) -+ Offset = (Index / NumElts) * VT.getStoreSize(); -+ else if (VT.getScalarType().isByteSized()) -+ Offset = Index * VT.getScalarType().getStoreSize(); -+ else -+ return SDValue(); -+ - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) - return SDValue(); -@@ -18478,8 +18498,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { - // The narrow load will be offset from the base address of the old load if - // we are extracting from something besides index 0 (little-endian). - SDLoc DL(Extract); -- SDValue BaseAddr = Ld->getOperand(1); -- unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); -+ SDValue BaseAddr = Ld->getBasePtr(); - - // TODO: Use "BaseIndexOffset" to make this more effective. - SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch deleted file mode 100644 index d1768216..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch +++ /dev/null @@ -1,97 +0,0 @@ -From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Fri, 27 Aug 2021 12:13:00 +0800 -Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we - store zeros in the rest of the byte - -We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0. - -I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue. - -Should fix PR48147. - -Reviewed By: RKSimon - -Differential Revision: https://reviews.llvm.org/D9129 - -Upstream-Status: Backport - -Signed-off-by:Craig Topper -Signed-off-by: Naveen Saini ---- - llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------ - llvm/lib/Target/X86/X86InstrAVX512.td | 2 -- - 2 files changed, 14 insertions(+), 8 deletions(-) - -diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp -index 63eb050e9b3a..96b5e2cfbd82 100644 ---- a/llvm/lib/Target/X86/X86ISelLowering.cpp -+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp -@@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, - // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores. - if (StoredVal.getValueType().isVector() && - StoredVal.getValueType().getVectorElementType() == MVT::i1) { -- assert(StoredVal.getValueType().getVectorNumElements() <= 8 && -- "Unexpected VT"); -+ unsigned NumElts = StoredVal.getValueType().getVectorNumElements(); -+ assert(NumElts <= 8 && "Unexpected VT"); - assert(!St->isTruncatingStore() && "Expected non-truncating store"); - assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && - "Expected AVX512F without AVX512DQI"); - -+ // We must pad with zeros to ensure we store zeroes to any unused bits. - StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, - DAG.getUNDEF(MVT::v16i1), StoredVal, - DAG.getIntPtrConstant(0, dl)); - StoredVal = DAG.getBitcast(MVT::i16, StoredVal); - StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal); -+ // Make sure we store zeros in the extra bits. -+ if (NumElts < 8) -+ StoredVal = DAG.getZeroExtendInReg(StoredVal, dl, -+ MVT::getIntegerVT(NumElts)); - - return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), - St->getPointerInfo(), St->getAlignment(), -@@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, - - EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); - StoredVal = DAG.getBitcast(NewVT, StoredVal); -- -- return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), -+ SDValue Val = StoredVal.getOperand(0); -+ // We must store zeros to the unused bits. -+ Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1); -+ return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(), - St->getPointerInfo(), St->getAlignment(), - St->getMemOperand()->getFlags()); - } -@@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, - } - - // Widen v2i1/v4i1 stores to v8i1. -- if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && -+ if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && - Subtarget.hasAVX512()) { - unsigned NumConcats = 8 / VT.getVectorNumElements(); -- SmallVector Ops(NumConcats, DAG.getUNDEF(VT)); -+ // We must store zeros to the unused bits. -+ SmallVector Ops(NumConcats, DAG.getConstant(0, dl, VT)); - Ops[0] = StoredVal; - StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); - return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), -diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td -index 32f012033fb0..d3b92183f87b 100644 ---- a/llvm/lib/Target/X86/X86InstrAVX512.td -+++ b/llvm/lib/Target/X86/X86InstrAVX512.td -@@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), - - // Load/store kreg - let Predicates = [HasDQI] in { -- def : Pat<(store VK1:$src, addr:$dst), -- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; - - def : Pat<(v1i1 (load addr:$src)), - (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch deleted file mode 100644 index 03b40e9b..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch +++ /dev/null @@ -1,173 +0,0 @@ -From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Fri, 27 Aug 2021 12:15:09 +0800 -Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via - getRegisterTypeForCallingConv rather than using CCPromoteToType in the td - file - - Previously we tried to promote these to xmm/ymm/zmm by promoting - in the X86CallingConv.td file. But this breaks when we run out - of xmm/ymm/zmm registers and need to fall back to memory. We end - up trying to create a non-sensical scalar to vector. This lead - to an assertion. The new tests in avx512-calling-conv.ll all - trigger this assertion. - - Since we really want to treat these types like we do on avx2, - it seems better to promote them before the calling convention - code gets involved. Except when the calling convention is one - that passes the vXi1 type in a k register. - - The changes in avx512-regcall-Mask.ll are because we indicated - that xmm/ymm/zmm types should be passed indirectly for the - Win64 ABI before we go to the common lines that promoted the - vXi1 types. This caused the promoted types to be picked up by - the default calling convention code. Now we promote them earlier - so they get passed indirectly as though they were xmm/ymm/zmm. - - Differential Revision: https://reviews.llvm.org/D75154 - -Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc] - -Signed-off-by:Craig Topper -Signed-off-by: Naveen Saini ---- - llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++-------- - 1 file changed, 61 insertions(+), 29 deletions(-) - -diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp -index 96b5e2cfbd82..d5de94aeb8a2 100644 ---- a/llvm/lib/Target/X86/X86ISelLowering.cpp -+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp -@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { - return TargetLoweringBase::getPreferredVectorAction(VT); - } - -+static std::pair -+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, -+ const X86Subtarget &Subtarget) { -+ // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling -+ // convention is one that uses k registers. -+ if (NumElts == 2) -+ return {MVT::v2i64, 1}; -+ if (NumElts == 4) -+ return {MVT::v4i32, 1}; -+ if (NumElts == 8 && CC != CallingConv::X86_RegCall && -+ CC != CallingConv::Intel_OCL_BI) -+ return {MVT::v8i16, 1}; -+ if (NumElts == 16 && CC != CallingConv::X86_RegCall && -+ CC != CallingConv::Intel_OCL_BI) -+ return {MVT::v16i8, 1}; -+ // v32i1 passes in ymm unless we have BWI and the calling convention is -+ // regcall. -+ if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall)) -+ return {MVT::v32i8, 1}; -+ // Split v64i1 vectors if we don't have v64i8 available. -+ if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) { -+ if (Subtarget.useAVX512Regs()) -+ return {MVT::v64i8, 1}; -+ return {MVT::v32i8, 2}; -+ } -+ -+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. -+ if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) || -+ NumElts > 64) -+ return {MVT::i8, NumElts}; -+ -+ return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0}; -+} -+ - MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, - CallingConv::ID CC, - EVT VT) const { -- // v32i1 vectors should be promoted to v32i8 to match avx2. -- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) -- return MVT::v32i8; -- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. - if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && -- Subtarget.hasAVX512() && -- (!isPowerOf2_32(VT.getVectorNumElements()) || -- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || -- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) -- return MVT::i8; -- // Split v64i1 vectors if we don't have v64i8 available. -- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && -- CC != CallingConv::X86_RegCall) -- return MVT::v32i1; -+ Subtarget.hasAVX512()) { -+ unsigned NumElts = VT.getVectorNumElements(); -+ -+ MVT RegisterVT; -+ unsigned NumRegisters; -+ std::tie(RegisterVT, NumRegisters) = -+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); -+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) -+ return RegisterVT; -+ } -+ - // FIXME: Should we just make these types legal and custom split operations? - if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI && - Subtarget.useAVX512Regs() && !Subtarget.hasBWI()) - return MVT::v16i32; -+ - return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); - } - - unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, - CallingConv::ID CC, - EVT VT) const { -- // v32i1 vectors should be promoted to v32i8 to match avx2. -- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) -- return 1; -- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. - if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && -- Subtarget.hasAVX512() && -- (!isPowerOf2_32(VT.getVectorNumElements()) || -- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || -- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) -- return VT.getVectorNumElements(); -- // Split v64i1 vectors if we don't have v64i8 available. -- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && -- CC != CallingConv::X86_RegCall) -- return 2; -+ Subtarget.hasAVX512()) { -+ unsigned NumElts = VT.getVectorNumElements(); -+ -+ MVT RegisterVT; -+ unsigned NumRegisters; -+ std::tie(RegisterVT, NumRegisters) = -+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); -+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) -+ return NumRegisters; -+ } -+ - // FIXME: Should we just make these types legal and custom split operations? - if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI && - Subtarget.useAVX512Regs() && !Subtarget.hasBWI()) - return 1; -+ - return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); - } - -@@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( - if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && - Subtarget.hasAVX512() && - (!isPowerOf2_32(VT.getVectorNumElements()) || -- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || -- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) { -+ (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) || -+ VT.getVectorNumElements() > 64)) { - RegisterVT = MVT::i8; - IntermediateVT = MVT::i1; - NumIntermediates = VT.getVectorNumElements(); -@@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( - // Split v64i1 vectors if we don't have v64i8 available. - if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && - CC != CallingConv::X86_RegCall) { -- RegisterVT = MVT::v32i1; -+ RegisterVT = MVT::v32i8; - IntermediateVT = MVT::v32i1; - NumIntermediates = 2; - return 2; --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch deleted file mode 100644 index 2e3872dc..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch +++ /dev/null @@ -1,550 +0,0 @@ -From 447cb2e1b2f0d8bdcfd8a0b39f47d28de50b5d82 Mon Sep 17 00:00:00 2001 -From: Djordje Todorovic -Date: Mon, 9 Mar 2020 11:02:35 +0100 -Subject: [PATCH] Enable the call site info only for -g + optimizations - -Emit call site info only in the case of '-g' + 'O>0' level. - -Differential Revision: https://reviews.llvm.org/D75175 - -Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/c15c68abdc6f1afece637bdedba808676191a8e6] -Signed-off-by: Anuj Mittal ---- - clang/include/clang/Basic/CodeGenOptions.def | 2 ++ - clang/lib/CodeGen/BackendUtil.cpp | 1 + - clang/lib/Frontend/CompilerInvocation.cpp | 4 +++- - llvm/include/llvm/CodeGen/CommandFlags.inc | 7 +++++++ - llvm/include/llvm/Target/TargetOptions.h | 7 ++++++- - llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 4 ++-- - llvm/lib/CodeGen/MachineFunction.cpp | 2 +- - llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 2 +- - llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- - llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +- - llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- - llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir | 2 +- - llvm/test/CodeGen/X86/call-site-info-output.ll | 4 ++-- - llvm/test/DebugInfo/AArch64/call-site-info-output.ll | 2 +- - llvm/test/DebugInfo/ARM/call-site-info-output.ll | 2 +- - .../MIR/AArch64/dbgcall-site-interpret-movzxi.mir | 2 +- - .../DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir | 2 +- - llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir | 2 +- - .../test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir | 2 +- - .../test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir | 2 +- - .../DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir | 2 +- - llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir | 2 +- - .../MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir | 2 +- - .../MIR/Hexagon/live-debug-values-bundled-entry-values.mir | 2 +- - llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir | 2 +- - llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir | 2 +- - llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir | 2 +- - .../test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir | 2 +- - .../test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir | 2 +- - .../DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir | 2 +- - llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir | 2 +- - .../DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir | 2 +- - llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir | 2 +- - llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir | 4 ++-- - .../DebugInfo/MIR/X86/entry-value-of-modified-param.mir | 2 +- - llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir | 2 +- - .../DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir | 2 +- - .../test/DebugInfo/MIR/X86/unreachable-block-call-site.mir | 2 +- - llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll | 2 +- - llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll | 2 +- - .../tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll | 2 +- - 41 files changed, 58 insertions(+), 41 deletions(-) - -diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def -index 1ecae98b13b1..6a6a9465273f 100644 ---- a/clang/include/clang/Basic/CodeGenOptions.def -+++ b/clang/include/clang/Basic/CodeGenOptions.def -@@ -64,6 +64,8 @@ CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new - ///< pass manager. - CODEGENOPT(DisableRedZone , 1, 0) ///< Set when -mno-red-zone is enabled. - CODEGENOPT(EnableDebugEntryValues, 1, 0) ///< Emit call site parameter dbg info -+CODEGENOPT(EmitCallSiteInfo, 1, 0) ///< Emit call site info only in the case of -+ ///< '-g' + 'O>0' level. - CODEGENOPT(IndirectTlsSegRefs, 1, 0) ///< Set when -mno-tls-direct-seg-refs - ///< is specified. - CODEGENOPT(DisableTailCalls , 1, 0) ///< Do not emit tail calls. -diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp -index db8fd4166d7a..db09f9b641fe 100644 ---- a/clang/lib/CodeGen/BackendUtil.cpp -+++ b/clang/lib/CodeGen/BackendUtil.cpp -@@ -482,6 +482,7 @@ static void initTargetOptions(llvm::TargetOptions &Options, - Options.EmitAddrsig = CodeGenOpts.Addrsig; - Options.EnableDebugEntryValues = CodeGenOpts.EnableDebugEntryValues; - Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection; -+ Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo; - - Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; - Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; -diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp -index 18fa06bf3c6d..2e73dcbdebe4 100644 ---- a/clang/lib/Frontend/CompilerInvocation.cpp -+++ b/clang/lib/Frontend/CompilerInvocation.cpp -@@ -789,8 +789,10 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, - - llvm::Triple T(TargetOpts.Triple); - if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() && -- llvm::is_contained(DebugEntryValueArchs, T.getArch())) -+ llvm::is_contained(DebugEntryValueArchs, T.getArch())) { - Opts.EnableDebugEntryValues = Args.hasArg(OPT_femit_debug_entry_values); -+ Opts.EmitCallSiteInfo = true; -+ } - - Opts.DisableO0ImplyOptNone = Args.hasArg(OPT_disable_O0_optnone); - Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone); -diff --git a/llvm/include/llvm/CodeGen/CommandFlags.inc b/llvm/include/llvm/CodeGen/CommandFlags.inc -index 6475a5b19edb..36073fe9cc98 100644 ---- a/llvm/include/llvm/CodeGen/CommandFlags.inc -+++ b/llvm/include/llvm/CodeGen/CommandFlags.inc -@@ -286,6 +286,12 @@ static cl::opt - EnableAddrsig("addrsig", cl::desc("Emit an address-significance table"), - cl::init(false)); - -+static cl::opt EmitCallSiteInfo( -+ "emit-call-site-info", -+ cl::desc( -+ "Emit call site debug information, if debug information is enabled."), -+ cl::init(false)); -+ - static cl::opt - EnableDebugEntryValues("debug-entry-values", - cl::desc("Emit debug info about parameter's entry values"), -@@ -349,6 +355,7 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() { - Options.ExceptionModel = ExceptionModel; - Options.EmitStackSizeSection = EnableStackSizeSection; - Options.EmitAddrsig = EnableAddrsig; -+ Options.EmitCallSiteInfo = EmitCallSiteInfo; - Options.EnableDebugEntryValues = EnableDebugEntryValues; - Options.ForceDwarfFrameSection = ForceDwarfFrameSection; - -diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h -index d27c7b0178f0..9378e290bed1 100644 ---- a/llvm/include/llvm/Target/TargetOptions.h -+++ b/llvm/include/llvm/Target/TargetOptions.h -@@ -134,7 +134,8 @@ namespace llvm { - EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false), - EmitStackSizeSection(false), EnableMachineOutliner(false), - SupportsDefaultOutlining(false), EmitAddrsig(false), -- EnableDebugEntryValues(false), ForceDwarfFrameSection(false) {} -+ EmitCallSiteInfo(false), EnableDebugEntryValues(false), -+ ForceDwarfFrameSection(false) {} - - /// PrintMachineCode - This flag is enabled when the -print-machineinstrs - /// option is specified on the command line, and should enable debugging -@@ -281,6 +282,10 @@ namespace llvm { - /// to selectively generate basic block sections. - std::shared_ptr BBSectionsFuncListBuf; - -+ /// The flag enables call site info production. It is used only for debug -+ /// info, and it is restricted only to optimized code. This can be used for -+ /// something else, so that should be controlled in the frontend. -+ unsigned EmitCallSiteInfo : 1; - /// Emit debug info about parameter's entry values. - unsigned EnableDebugEntryValues : 1; - -diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp -index 10157c746b46..f955bdc6186a 100644 ---- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp -+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp -@@ -381,11 +381,11 @@ bool MIRParserImpl::initializeCallSiteInfo( - CSInfo.emplace_back(Reg, ArgRegPair.ArgNo); - } - -- if (TM.Options.EnableDebugEntryValues) -+ if (TM.Options.EmitCallSiteInfo) - MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo)); - } - -- if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues) -+ if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo) - return error(Twine("Call site info provided but not used")); - return false; - } -diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp -index 4612690644fe..c3795b7ed314 100644 ---- a/llvm/lib/CodeGen/MachineFunction.cpp -+++ b/llvm/lib/CodeGen/MachineFunction.cpp -@@ -855,7 +855,7 @@ MachineFunction::CallSiteInfoMap::iterator - MachineFunction::getCallSiteInfo(const MachineInstr *MI) { - assert(MI->isCall() && "Call site info refers only to call instructions!"); - -- if (!Target.Options.EnableDebugEntryValues) -+ if (!Target.Options.EmitCallSiteInfo) - return CallSitesInfo.end(); - return CallSitesInfo.find(MI); - } -diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp -index 0e4d783e3505..52099f24aca5 100644 ---- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp -+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp -@@ -863,7 +863,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { - MI = &*std::next(Before); - } - -- if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues) -+ if (MI->isCall() && DAG->getTarget().Options.EmitCallSiteInfo) - MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node)); - - return MI; -diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp -index 23f05eaad944..63ff3031a5e8 100644 ---- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp -+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp -@@ -4132,7 +4132,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, - RegsToPass.emplace_back(VA.getLocReg(), Arg); - RegsUsed.insert(VA.getLocReg()); - const TargetOptions &Options = DAG.getTarget().Options; -- if (Options.EnableDebugEntryValues) -+ if (Options.EmitCallSiteInfo) - CSInfo.emplace_back(VA.getLocReg(), i); - } - } else { -diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp -index 9f504b1eaa42..5589ba34a2ac 100644 ---- a/llvm/lib/Target/ARM/ARMISelLowering.cpp -+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp -@@ -2222,7 +2222,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, - isThisReturn = true; - } - const TargetOptions &Options = DAG.getTarget().Options; -- if (Options.EnableDebugEntryValues) -+ if (Options.EmitCallSiteInfo) - CSInfo.emplace_back(VA.getLocReg(), i); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else if (isByVal) { -diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp -index d5de94aeb8a2..4808bdf6ddc2 100644 ---- a/llvm/lib/Target/X86/X86ISelLowering.cpp -+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp -@@ -4030,7 +4030,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, - } else if (VA.isRegLoc()) { - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - const TargetOptions &Options = DAG.getTarget().Options; -- if (Options.EnableDebugEntryValues) -+ if (Options.EmitCallSiteInfo) - CSInfo.emplace_back(VA.getLocReg(), I); - if (isVarArg && IsWin64) { - // Win64 ABI requires argument XMM reg to be copied to the corresponding -diff --git a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir -index 5ffa0293a2e1..fec542223fc9 100644 ---- a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir -+++ b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s - - # Verify that it is possible to read and write MIR where a callSites entry - # points to a call residing in a bundle. The offset should point to the call -diff --git a/llvm/test/CodeGen/X86/call-site-info-output.ll b/llvm/test/CodeGen/X86/call-site-info-output.ll -index 4b1e236aadfe..a0438f0c2b98 100644 ---- a/llvm/test/CodeGen/X86/call-site-info-output.ll -+++ b/llvm/test/CodeGen/X86/call-site-info-output.ll -@@ -1,6 +1,6 @@ - ; Test call site info MIR printer and parser.Parser assertions and machine - ; verifier will check the rest; --; RUN: llc -debug-entry-values %s -stop-before=finalize-isel -o %t.mir -+; RUN: llc -emit-call-site-info -debug-entry-values %s -stop-before=finalize-isel -o %t.mir - ; RUN: cat %t.mir | FileCheck %s - ; CHECK: name: fn2 - ; CHECK: callSites: -@@ -10,7 +10,7 @@ - ; CHECK-NEXT: arg: 0, reg: '$edi' - ; CHECK-NEXT: arg: 1, reg: '$esi' - ; CHECK-NEXT: arg: 2, reg: '$edx' --; RUN: llc -debug-entry-values %t.mir -run-pass=finalize-isel -o -| FileCheck %s --check-prefix=PARSER -+; RUN: llc -emit-call-site-info -debug-entry-values %t.mir -run-pass=finalize-isel -o -| FileCheck %s --check-prefix=PARSER - ; Verify that we are able to parse output mir and that we are getting the same result. - ; PARSER: name: fn2 - ; PARSER: callSites: -diff --git a/llvm/test/DebugInfo/AArch64/call-site-info-output.ll b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll -index d52d6962f3c4..17d9f7f18762 100644 ---- a/llvm/test/DebugInfo/AArch64/call-site-info-output.ll -+++ b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll -@@ -1,4 +1,4 @@ --; RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s -+; RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s - ; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers. - ; Produced from: - ; extern int fn1(int,int,int); -diff --git a/llvm/test/DebugInfo/ARM/call-site-info-output.ll b/llvm/test/DebugInfo/ARM/call-site-info-output.ll -index 9255a7d57dde..ed726dfe753f 100644 ---- a/llvm/test/DebugInfo/ARM/call-site-info-output.ll -+++ b/llvm/test/DebugInfo/ARM/call-site-info-output.ll -@@ -1,4 +1,4 @@ --; RUN: llc -mtriple arm-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s -+; RUN: llc -emit-call-site-info -mtriple arm-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s - ; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers. - ; Produced from: - ; extern int fn1(int,int,int); -diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir -index dc7561ca6400..057779a90721 100644 ---- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir -+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir -@@ -1,4 +1,4 @@ --# RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s -+# RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s - # - # Based on the following C reproducer: - # -diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir -index 0371ccef603e..d925bc395878 100644 ---- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir -+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir -@@ -1,4 +1,4 @@ --# RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s -+# RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s - # Following code is used for producing this test case. Note that - # some of argument loading instruction are modified in order to - # cover certain cases. -diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir -index 916a14022ba5..4a87dad3b9b5 100644 ---- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir -+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -start-after=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s - - # Based on the following C reproducer: - # -diff --git a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir -index fbf9b3454689..d85f2d25391d 100644 ---- a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir -+++ b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir -@@ -1,4 +1,4 @@ --# RUN: llc -start-after=livedebugvalues -filetype=obj -o - %s \ -+# RUN: llc -emit-call-site-info -start-after=livedebugvalues -filetype=obj -o - %s \ - # RUN: | llvm-dwarfdump -v - | FileCheck %s - - # This tests for a crash in DwarfDebug's singular DBG_VALUE range promotion when -diff --git a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir -index ce8dc97f0e72..0ae4e6ec485c 100644 ---- a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir -+++ b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir -@@ -1,4 +1,4 @@ --# RUN: llc -mtriple=arm-linux-gnueabi -debug-entry-values -filetype=obj -start-after=machineverifier %s -o -| llvm-dwarfdump -| FileCheck %s -+# RUN: llc -emit-call-site-info -mtriple=arm-linux-gnueabi -debug-entry-values -filetype=obj -start-after=machineverifier %s -o -| llvm-dwarfdump -| FileCheck %s - # Following code is used for producing this test case. Note that - # some of argument loading instruction are modified in order to - # cover certain cases. -diff --git a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir -index 9001c8ba8eea..5b84d9e9627f 100644 ---- a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir -+++ b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s - - # Based on the following C reproducer: - # -diff --git a/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir b/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir -index aa7b54c1e5bb..11e9c4c90836 100644 ---- a/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir -+++ b/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir -@@ -1,4 +1,4 @@ --# RUN: llc -mtriple=arm-linux-gnu -debug-entry-values -run-pass if-converter %s -o -| FileCheck %s -+# RUN: llc -emit-call-site-info -mtriple=arm-linux-gnu -debug-entry-values -run-pass if-converter %s -o -| FileCheck %s - - # Vefify that the call site info will be updated after the optimization. - # This test case would previously trigger an assertion when -diff --git a/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir -index 8ae628af2c09..3ae23d4189bf 100644 ---- a/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir -+++ b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir -@@ -1,4 +1,4 @@ --# RUN: llc -mtriple hexagon -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s -+# RUN: llc -mtriple hexagon -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s - - # Based on the following C reproducer: - # -diff --git a/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir b/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir -index ff0a539dd15d..8bb0b3202acd 100644 ---- a/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir -+++ b/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s - - # Verify that the entry values for the input parameters are inserted after the - # bundles which contains the registers' clobbering instructions (the calls to -diff --git a/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir b/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir -index 8a4e8b5632c2..3cf41467f7f9 100644 ---- a/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir -+++ b/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -start-after=livedebugvalues -o - %s | FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -o - %s | FileCheck %s - - # This test would previously trigger an assertion when trying to describe the - # call site value for callee()'s float parameter. -diff --git a/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir b/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir -index e6fe5d2de878..4e5a07321d42 100644 ---- a/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir -+++ b/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -start-before=livedebugvalues -mtriple=x86_64-apple-darwin -o %t %s -filetype=obj -+# RUN: llc -emit-call-site-info -debug-entry-values -start-before=livedebugvalues -mtriple=x86_64-apple-darwin -o %t %s -filetype=obj - # RUN: llvm-dwarfdump %t | FileCheck %s - # - # int global; -diff --git a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir -index c32a1155d038..edeef2c7aed4 100644 ---- a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir -+++ b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir -@@ -1,6 +1,6 @@ - # Check that llvm can describe a call site parameter which resides in a spill slot. - # --# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s - # - # Command: - # $ ~/src/builds/llvm-project-master-RA/bin/clang -g -Xclang -femit-debug-entry-values -O2 -c -o spill.o spill.cc -mllvm -stop-before=machineverifier -o spill.mir -diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir -index a2d51a203512..01a2b887a60b 100644 ---- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir -+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s - - # Based on the following reproducer: - # -diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir -index f9e9459f1abd..104bc0146798 100644 ---- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir -+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s - # - # CHECK: DW_TAG_GNU_call_site - # CHECK-NEXT: DW_AT_abstract_origin {{.*}} "foo" -diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir -index 1bb70f6d4530..4d88fa9aab74 100644 ---- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir -+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s - # CHECK: DW_TAG_GNU_call_site - # CHECK-NEXT: DW_AT_abstract_origin {{.*}} "foo") - # CHECK-NEXT: DW_AT_low_pc {{.*}} -diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir -index 235787573f51..81af598ba194 100644 ---- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir -+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -start-before=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -start-before=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s - - # Based on the following C++ code: - # struct A { A(A &) {} }; -diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir -index db0934c595c3..46adedd1be44 100644 ---- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir -+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir -@@ -1,4 +1,4 @@ --# RUN: llc -O1 -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s -+# RUN: llc -O1 -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s - - # Based on the following C reproducer: - # -diff --git a/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir b/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir -index 5d203029936e..1d7b64f169d1 100644 ---- a/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir -+++ b/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s - # - #extern void fn2(int); - # -diff --git a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir -index e79be66cd4e3..c39bc4db50be 100644 ---- a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir -+++ b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir -@@ -2,8 +2,8 @@ - # When the debugger tuning is set to gdb, use GNU opcodes. - # For lldb, use the standard DWARF5 opcodes. - --# RUN: llc -debug-entry-values -debugger-tune=gdb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-GNU --# RUN: llc -debug-entry-values -debugger-tune=lldb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-DWARF5 -+# RUN: llc -emit-call-site-info -debug-entry-values -debugger-tune=gdb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-GNU -+# RUN: llc -emit-call-site-info -debug-entry-values -debugger-tune=lldb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-DWARF5 - # - # extern void foo(int *a, int b, int c, int d, int e, int f); - # extern int getVal(); -diff --git a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir -index 8d121c3a30b9..c7f15aaaa562 100644 ---- a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir -+++ b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s - # - #extern void fn1 (int, int, int); - # -diff --git a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir -index 2396daada876..aa8fdd7afd47 100644 ---- a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir -+++ b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s - # - # The test case was artificially adjusted, in order to make proper diamond basic - # block structure relevant to the debug entry values propagation. -diff --git a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir -index 86b1cddaa462..c5af863954bf 100644 ---- a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir -+++ b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir -@@ -1,4 +1,4 @@ --# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s -+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s - # - #extern void fn1 (int, int, int); - #__attribute__((noinline)) -diff --git a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir -index d282d796f6d7..ea9c12b5a192 100644 ---- a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir -+++ b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir -@@ -1,4 +1,4 @@ --# RUN: llc -mtriple=x86_64-pc-linux -debug-entry-values -run-pass=unreachable-mbb-elimination -o - %s | FileCheck %s -+# RUN: llc -mtriple=x86_64-pc-linux -emit-call-site-info -debug-entry-values -run-pass=unreachable-mbb-elimination -o - %s | FileCheck %s - - # Verify that the call site information for the call residing in the eliminated - # block is removed. This test case would previously trigger an assertion when -diff --git a/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll b/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll -index b698f1cdbfe8..b8cd9574cc63 100644 ---- a/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll -+++ b/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll -@@ -1,4 +1,4 @@ --; RUN: llc -O1 -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s -+; RUN: llc -O1 -emit-call-site-info -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s - - ; Verify that the 64-bit call site immediates are not truncated. - ; -diff --git a/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll b/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll -index 9fe67f82a2b4..5d37774f55d6 100644 ---- a/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll -+++ b/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll -@@ -1,4 +1,4 @@ --; RUN: llc -O3 -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s -+; RUN: llc -O3 -emit-call-site-info -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s - - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - target triple = "x86_64-unknown-linux-gnu" -diff --git a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll -index c304e9d768a5..d126757398ff 100644 ---- a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll -+++ b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll -@@ -1,4 +1,4 @@ --; RUN: llc -debug-entry-values %s -o - -filetype=obj \ -+; RUN: llc -emit-call-site-info -debug-entry-values %s -o - -filetype=obj \ - ; RUN: | llvm-dwarfdump -statistics - | FileCheck %s - ; - ; The LLVM IR file was generated on this source code by using --- -2.33.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch deleted file mode 100644 index 6b2db931..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch +++ /dev/null @@ -1,2243 +0,0 @@ -From fd7146658e3b4ce045dfb332c2edf216f76c1e1f Mon Sep 17 00:00:00 2001 -From: serge-sans-paille -Date: Wed, 4 Mar 2020 00:47:43 +0100 -Subject: [PATCH] Replace MCTargetOptionsCommandFlags.inc and CommandFlags.inc - by runtime registration - -MCTargetOptionsCommandFlags.inc and CommandFlags.inc are headers which contain -cl::opt with static storage. -These headers are meant to be incuded by tools to make it easier to parametrize -codegen/mc. - -However, these headers are also included in at least two libraries: lldCommon -and handle-llvm. As a result, when creating DYLIB, clang-cpp holds a reference -to the options, and lldCommon holds another reference. Linking the two in a -single executable, as zig does[0], results in a double registration. - -This patch explores an other approach: the .inc files are moved to regular -files, and the registration happens on-demand through static declaration of -options in the constructor of a static object. - -[0] https://bugzilla.redhat.com/show_bug.cgi?id=1756977#c5 - -Differential Revision: https://reviews.llvm.org/D75579 - -Upstream-Status: Backport -Signed-off-by: Anuj Mittal ---- - .../clang-fuzzer/handle-llvm/handle_llvm.cpp | 32 +- - lld/Common/TargetOptionsCommandFlags.cpp | 25 +- - llvm/include/llvm/CodeGen/CommandFlags.h | 149 +++++ - .../llvm/MC/MCTargetOptionsCommandFlags.h | 54 ++ - .../llvm/MC/MCTargetOptionsCommandFlags.inc | 65 -- - llvm/include/llvm/module.modulemap | 3 - - llvm/lib/CodeGen/CMakeLists.txt | 1 + - llvm/lib/CodeGen/CommandFlags.cpp | 588 ++++++++++++++++++ - llvm/lib/MC/CMakeLists.txt | 1 + - llvm/lib/MC/MCTargetOptionsCommandFlags.cpp | 105 ++++ - llvm/tools/dsymutil/DwarfStreamer.cpp | 7 +- - llvm/tools/gold/gold-plugin.cpp | 16 +- - llvm/tools/llc/CMakeLists.txt | 1 + - llvm/tools/llc/llc.cpp | 43 +- - llvm/tools/lli/CMakeLists.txt | 1 + - llvm/tools/lli/lli.cpp | 41 +- - llvm/tools/llvm-dwp/llvm-dwp.cpp | 6 +- - .../llvm-isel-fuzzer/llvm-isel-fuzzer.cpp | 17 +- - llvm/tools/llvm-lto/CMakeLists.txt | 5 +- - llvm/tools/llvm-lto/llvm-lto.cpp | 28 +- - llvm/tools/llvm-lto2/CMakeLists.txt | 1 + - llvm/tools/llvm-lto2/llvm-lto2.cpp | 20 +- - .../llvm-mc-assemble-fuzzer/CMakeLists.txt | 1 + - .../llvm-mc-assemble-fuzzer.cpp | 12 +- - llvm/tools/llvm-mc/CMakeLists.txt | 1 + - llvm/tools/llvm-mc/llvm-mc.cpp | 6 +- - llvm/tools/llvm-mca/llvm-mca.cpp | 8 +- - .../tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp | 16 +- - llvm/tools/lto/CMakeLists.txt | 4 +- - llvm/tools/lto/lto.cpp | 35 +- - llvm/tools/opt/opt.cpp | 23 +- - .../DebugInfo/DWARF/DwarfGenerator.cpp | 6 +- - 32 files changed, 1088 insertions(+), 233 deletions(-) - create mode 100644 llvm/include/llvm/CodeGen/CommandFlags.h - create mode 100644 llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h - delete mode 100644 llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc - create mode 100644 llvm/lib/CodeGen/CommandFlags.cpp - create mode 100644 llvm/lib/MC/MCTargetOptionsCommandFlags.cpp - -diff --git a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp -index d8ab14594185..aefb761cd70c 100644 ---- a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp -+++ b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp -@@ -19,7 +19,7 @@ - #include "llvm/ADT/Triple.h" - #include "llvm/Analysis/TargetLibraryInfo.h" - #include "llvm/Analysis/TargetTransformInfo.h" --#include "llvm/CodeGen/CommandFlags.inc" -+#include "llvm/CodeGen/CommandFlags.h" - #include "llvm/CodeGen/MachineModuleInfo.h" - #include "llvm/CodeGen/TargetPassConfig.h" - #include "llvm/ExecutionEngine/JITEventListener.h" -@@ -29,9 +29,9 @@ - #include "llvm/ExecutionEngine/RTDyldMemoryManager.h" - #include "llvm/ExecutionEngine/SectionMemoryManager.h" - #include "llvm/IR/IRPrintingPasses.h" -+#include "llvm/IR/LLVMContext.h" - #include "llvm/IR/LegacyPassManager.h" - #include "llvm/IR/LegacyPassNameParser.h" --#include "llvm/IR/LLVMContext.h" - #include "llvm/IR/Module.h" - #include "llvm/IR/Verifier.h" - #include "llvm/IRReader/IRReader.h" -@@ -42,12 +42,14 @@ - #include "llvm/Support/TargetRegistry.h" - #include "llvm/Support/TargetSelect.h" - #include "llvm/Target/TargetMachine.h" --#include "llvm/Transforms/IPO/PassManagerBuilder.h" - #include "llvm/Transforms/IPO.h" -+#include "llvm/Transforms/IPO/PassManagerBuilder.h" - #include "llvm/Transforms/Vectorize.h" - - using namespace llvm; - -+static codegen::RegisterCodeGenFlags CGF; -+ - // Define a type for the functions that are compiled and executed - typedef void (*LLVMFunc)(int*, int*, int*, int); - -@@ -100,15 +102,17 @@ static std::string OptLLVM(const std::string &IR, CodeGenOpt::Level OLvl) { - ErrorAndExit("Could not parse IR"); - - Triple ModuleTriple(M->getTargetTriple()); -- const TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ const TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - std::string E; -- const Target *TheTarget = TargetRegistry::lookupTarget(MArch, ModuleTriple, E); -- TargetMachine *Machine = -- TheTarget->createTargetMachine(M->getTargetTriple(), getCPUStr(), -- getFeaturesStr(), Options, getRelocModel(), -- getCodeModel(), OLvl); -+ const Target *TheTarget = -+ TargetRegistry::lookupTarget(codegen::getMArch(), ModuleTriple, E); -+ TargetMachine *Machine = TheTarget->createTargetMachine( -+ M->getTargetTriple(), codegen::getCPUStr(), codegen::getFeaturesStr(), -+ Options, codegen::getExplicitRelocModel(), -+ codegen::getExplicitCodeModel(), OLvl); - std::unique_ptr TM(Machine); -- setFunctionAttributes(getCPUStr(), getFeaturesStr(), *M); -+ codegen::setFunctionAttributes(codegen::getCPUStr(), -+ codegen::getFeaturesStr(), *M); - - legacy::PassManager Passes; - -@@ -154,14 +158,14 @@ static void CreateAndRunJITFunc(const std::string &IR, CodeGenOpt::Level OLvl) { - - std::string ErrorMsg; - EngineBuilder builder(std::move(M)); -- builder.setMArch(MArch); -- builder.setMCPU(getCPUStr()); -- builder.setMAttrs(getFeatureList()); -+ builder.setMArch(codegen::getMArch()); -+ builder.setMCPU(codegen::getCPUStr()); -+ builder.setMAttrs(codegen::getFeatureList()); - builder.setErrorStr(&ErrorMsg); - builder.setEngineKind(EngineKind::JIT); - builder.setMCJITMemoryManager(std::make_unique()); - builder.setOptLevel(OLvl); -- builder.setTargetOptions(InitTargetOptionsFromCodeGenFlags()); -+ builder.setTargetOptions(codegen::InitTargetOptionsFromCodeGenFlags()); - - std::unique_ptr EE(builder.create()); - if (!EE) -diff --git a/lld/Common/TargetOptionsCommandFlags.cpp b/lld/Common/TargetOptionsCommandFlags.cpp -index 0137feb63f37..9b166a3e130a 100644 ---- a/lld/Common/TargetOptionsCommandFlags.cpp -+++ b/lld/Common/TargetOptionsCommandFlags.cpp -@@ -5,35 +5,26 @@ - // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - // - //===----------------------------------------------------------------------===// --// --// This file exists as a place for global variables defined in LLVM's --// CodeGen/CommandFlags.inc. By putting the resulting object file in --// an archive and linking with it, the definitions will automatically be --// included when needed and skipped when already present. --// --//===----------------------------------------------------------------------===// - - #include "lld/Common/TargetOptionsCommandFlags.h" - --#include "llvm/CodeGen/CommandFlags.inc" -+#include "llvm/CodeGen/CommandFlags.h" - #include "llvm/Target/TargetOptions.h" - --// Define an externally visible version of --// initTargetOptionsFromCodeGenFlags, so that its functionality can be --// used without having to include llvm/CodeGen/CommandFlags.inc, which --// would lead to multiple definitions of the command line flags. -+static llvm::codegen::RegisterCodeGenFlags CGF; -+ - llvm::TargetOptions lld::initTargetOptionsFromCodeGenFlags() { -- return ::InitTargetOptionsFromCodeGenFlags(); -+ return llvm::codegen::InitTargetOptionsFromCodeGenFlags(); - } - - llvm::Optional lld::getRelocModelFromCMModel() { -- return getRelocModel(); -+ return llvm::codegen::getExplicitRelocModel(); - } - - llvm::Optional lld::getCodeModelFromCMModel() { -- return getCodeModel(); -+ return llvm::codegen::getExplicitCodeModel(); - } - --std::string lld::getCPUStr() { return ::getCPUStr(); } -+std::string lld::getCPUStr() { return llvm::codegen::getCPUStr(); } - --std::vector lld::getMAttrs() { return ::MAttrs; } -+std::vector lld::getMAttrs() { return llvm::codegen::getMAttrs(); } -diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h -new file mode 100644 -index 000000000000..cdec26879f73 ---- /dev/null -+++ b/llvm/include/llvm/CodeGen/CommandFlags.h -@@ -0,0 +1,149 @@ -+//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// This file contains codegen-specific flags that are shared between different -+// command line tools. The tools "llc" and "opt" both use this file to prevent -+// flag duplication. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "llvm/ADT/StringExtras.h" -+#include "llvm/IR/Instructions.h" -+#include "llvm/IR/Intrinsics.h" -+#include "llvm/IR/Module.h" -+#include "llvm/MC/MCTargetOptionsCommandFlags.h" -+#include "llvm/MC/SubtargetFeature.h" -+#include "llvm/Support/CodeGen.h" -+#include "llvm/Support/CommandLine.h" -+#include "llvm/Support/Host.h" -+#include "llvm/Target/TargetMachine.h" -+#include "llvm/Target/TargetOptions.h" -+#include -+ -+namespace llvm { -+ -+namespace codegen { -+ -+std::string getMArch(); -+ -+std::string getMCPU(); -+ -+std::vector getMAttrs(); -+ -+Reloc::Model getRelocModel(); -+Optional getExplicitRelocModel(); -+ -+ThreadModel::Model getThreadModel(); -+ -+CodeModel::Model getCodeModel(); -+Optional getExplicitCodeModel(); -+ -+llvm::ExceptionHandling getExceptionModel(); -+ -+CodeGenFileType getFileType(); -+Optional getExplicitFileType(); -+ -+CodeGenFileType getFileType(); -+ -+llvm::FramePointer::FP getFramePointerUsage(); -+ -+bool getEnableUnsafeFPMath(); -+ -+bool getEnableNoInfsFPMath(); -+ -+bool getEnableNoNaNsFPMath(); -+ -+bool getEnableNoSignedZerosFPMath(); -+ -+bool getEnableNoTrappingFPMath(); -+ -+llvm::FPDenormal::DenormalMode getDenormalFPMath(); -+ -+bool getEnableHonorSignDependentRoundingFPMath(); -+ -+llvm::FloatABI::ABIType getFloatABIForCalls(); -+ -+llvm::FPOpFusion::FPOpFusionMode getFuseFPOps(); -+ -+bool getDontPlaceZerosInBSS(); -+ -+bool getEnableGuaranteedTailCallOpt(); -+ -+bool getDisableTailCalls(); -+ -+bool getStackSymbolOrdering(); -+ -+unsigned getOverrideStackAlignment(); -+ -+bool getStackRealign(); -+ -+std::string getTrapFuncName(); -+ -+bool getUseCtors(); -+ -+bool getRelaxELFRelocations(); -+ -+bool getDataSections(); -+Optional getExplicitDataSections(); -+ -+bool getFunctionSections(); -+Optional getExplicitFunctionSections(); -+ -+std::string getBBSections(); -+ -+unsigned getTLSSize(); -+ -+bool getEmulatedTLS(); -+ -+bool getUniqueSectionNames(); -+ -+bool getUniqueBBSectionNames(); -+ -+llvm::EABI getEABIVersion(); -+ -+llvm::DebuggerKind getDebuggerTuningOpt(); -+ -+bool getEnableStackSizeSection(); -+ -+bool getEnableAddrsig(); -+ -+bool getEmitCallSiteInfo(); -+ -+bool getEnableDebugEntryValues(); -+ -+bool getForceDwarfFrameSection(); -+ -+/// Create this object with static storage to register codegen-related command -+/// line options. -+struct RegisterCodeGenFlags { -+ RegisterCodeGenFlags(); -+}; -+ -+llvm::BasicBlockSection getBBSectionsMode(llvm::TargetOptions &Options); -+ -+// Common utility function tightly tied to the options listed here. Initializes -+// a TargetOptions object with CodeGen flags and returns it. -+TargetOptions InitTargetOptionsFromCodeGenFlags(); -+ -+std::string getCPUStr(); -+ -+std::string getFeaturesStr(); -+ -+std::vector getFeatureList(); -+ -+void renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val); -+ -+/// Set function attributes of function \p F based on CPU, Features, and command -+/// line flags. -+void setFunctionAttributes(StringRef CPU, StringRef Features, Function &F); -+ -+/// Set function attributes of functions in Module M based on CPU, -+/// Features, and command line flags. -+void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M); -+} // namespace codegen -+} // namespace llvm -diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h -new file mode 100644 -index 000000000000..7a5edf78fdcc ---- /dev/null -+++ b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h -@@ -0,0 +1,54 @@ -+//===-- MCTargetOptionsCommandFlags.h --------------------------*- C++ -*-===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// This file contains machine code-specific flags that are shared between -+// different command line tools. -+// -+//===----------------------------------------------------------------------===// -+ -+#ifndef LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H -+#define LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H -+ -+#include "llvm/ADT/Optional.h" -+#include "llvm/MC/MCTargetOptions.h" -+#include "llvm/Support/CommandLine.h" -+ -+namespace llvm { -+ -+namespace mc { -+ -+bool getRelaxAll(); -+Optional getExplicitRelaxAll(); -+ -+bool getIncrementalLinkerCompatible(); -+ -+int getDwarfVersion(); -+ -+bool getShowMCInst(); -+ -+bool getFatalWarnings(); -+ -+bool getNoWarn(); -+ -+bool getNoDeprecatedWarn(); -+ -+std::string getABIName(); -+ -+/// Create this object with static storage to register mc-related command -+/// line options. -+struct RegisterMCTargetOptionsFlags { -+ RegisterMCTargetOptionsFlags(); -+}; -+ -+MCTargetOptions InitMCTargetOptionsFromFlags(); -+ -+} // namespace mc -+ -+} // namespace llvm -+ -+#endif -diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc -deleted file mode 100644 -index 93e21b626eac..000000000000 ---- a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc -+++ /dev/null -@@ -1,65 +0,0 @@ --//===-- MCTargetOptionsCommandFlags.h --------------------------*- C++ -*-===// --// --// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. --// See https://llvm.org/LICENSE.txt for license information. --// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception --// --//===----------------------------------------------------------------------===// --// --// This file contains machine code-specific flags that are shared between --// different command line tools. --// --//===----------------------------------------------------------------------===// -- --#ifndef LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H --#define LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H -- --#include "llvm/MC/MCTargetOptions.h" --#include "llvm/Support/CommandLine.h" --using namespace llvm; -- --static cl::opt RelaxAll("mc-relax-all", -- cl::desc("When used with filetype=obj, " -- "relax all fixups in the emitted object file")); -- --static cl::opt IncrementalLinkerCompatible( -- "incremental-linker-compatible", -- cl::desc( -- "When used with filetype=obj, " -- "emit an object file which can be used with an incremental linker")); -- --static cl::opt DwarfVersion("dwarf-version", cl::desc("Dwarf version"), -- cl::init(0)); -- --static cl::opt ShowMCInst("asm-show-inst", -- cl::desc("Emit internal instruction representation to " -- "assembly file")); -- --static cl::opt FatalWarnings("fatal-warnings", -- cl::desc("Treat warnings as errors")); -- --static cl::opt NoWarn("no-warn", cl::desc("Suppress all warnings")); --static cl::alias NoWarnW("W", cl::desc("Alias for --no-warn"), cl::aliasopt(NoWarn)); -- --static cl::opt NoDeprecatedWarn("no-deprecated-warn", -- cl::desc("Suppress all deprecated warnings")); -- --static cl::opt --ABIName("target-abi", cl::Hidden, -- cl::desc("The name of the ABI to be targeted from the backend."), -- cl::init("")); -- --static MCTargetOptions InitMCTargetOptionsFromFlags() { -- MCTargetOptions Options; -- Options.MCRelaxAll = RelaxAll; -- Options.MCIncrementalLinkerCompatible = IncrementalLinkerCompatible; -- Options.DwarfVersion = DwarfVersion; -- Options.ShowMCInst = ShowMCInst; -- Options.ABIName = ABIName; -- Options.MCFatalWarnings = FatalWarnings; -- Options.MCNoWarn = NoWarn; -- Options.MCNoDeprecatedWarn = NoDeprecatedWarn; -- return Options; --} -- --#endif -diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap -index d281682ae003..d176b3dfd4be 100644 ---- a/llvm/include/llvm/module.modulemap -+++ b/llvm/include/llvm/module.modulemap -@@ -29,7 +29,6 @@ module LLVM_Backend { - exclude header "CodeGen/LinkAllCodegenComponents.h" - - // These are intended for (repeated) textual inclusion. -- textual header "CodeGen/CommandFlags.inc" - textual header "CodeGen/DIEValue.def" - } - } -@@ -308,8 +307,6 @@ module LLVM_MC { - - umbrella "MC" - module * { export * } -- -- textual header "MC/MCTargetOptionsCommandFlags.inc" - } - - // Used by llvm-tblgen -diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt -index a3916b7c6242..c6be91247017 100644 ---- a/llvm/lib/CodeGen/CMakeLists.txt -+++ b/llvm/lib/CodeGen/CMakeLists.txt -@@ -14,6 +14,7 @@ add_llvm_component_library(LLVMCodeGen - CFIInstrInserter.cpp - CodeGen.cpp - CodeGenPrepare.cpp -+ CommandFlags.cpp - CriticalAntiDepBreaker.cpp - DeadMachineInstructionElim.cpp - DetectDeadLanes.cpp -diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp -new file mode 100644 -index 000000000000..7acb84df582f ---- /dev/null -+++ b/llvm/lib/CodeGen/CommandFlags.cpp -@@ -0,0 +1,588 @@ -+//===-- CommandFlags.cpp - Command Line Flags Interface ---------*- C++ -*-===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// This file contains codegen-specific flags that are shared between different -+// command line tools. The tools "llc" and "opt" both use this file to prevent -+// flag duplication. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "llvm/CodeGen/CommandFlags.h" -+ -+using namespace llvm; -+ -+#define CGOPT(TY, NAME) \ -+ static cl::opt *NAME##View; \ -+ TY codegen::get##NAME() { \ -+ assert(NAME##View && "RegisterCodeGenFlags not created."); \ -+ return *NAME##View; \ -+ } -+ -+#define CGLIST(TY, NAME) \ -+ static cl::list *NAME##View; \ -+ std::vector codegen::get##NAME() { \ -+ assert(NAME##View && "RegisterCodeGenFlags not created."); \ -+ return *NAME##View; \ -+ } -+ -+#define CGOPT_EXP(TY, NAME) \ -+ CGOPT(TY, NAME) \ -+ Optional codegen::getExplicit##NAME() { \ -+ if (NAME##View->getNumOccurrences()) { \ -+ TY res = *NAME##View; \ -+ return res; \ -+ } \ -+ return None; \ -+ } -+ -+CGOPT(std::string, MArch) -+CGOPT(std::string, MCPU) -+CGLIST(std::string, MAttrs) -+CGOPT_EXP(Reloc::Model, RelocModel) -+CGOPT(ThreadModel::Model, ThreadModel) -+CGOPT_EXP(CodeModel::Model, CodeModel) -+CGOPT(ExceptionHandling, ExceptionModel) -+CGOPT_EXP(CodeGenFileType, FileType) -+CGOPT(FramePointer::FP, FramePointerUsage) -+CGOPT(bool, EnableUnsafeFPMath) -+CGOPT(bool, EnableNoInfsFPMath) -+CGOPT(bool, EnableNoNaNsFPMath) -+CGOPT(bool, EnableNoSignedZerosFPMath) -+CGOPT(bool, EnableNoTrappingFPMath) -+CGOPT(FPDenormal::DenormalMode, DenormalFPMath) -+CGOPT(bool, EnableHonorSignDependentRoundingFPMath) -+CGOPT(FloatABI::ABIType, FloatABIForCalls) -+CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps) -+CGOPT(bool, DontPlaceZerosInBSS) -+CGOPT(bool, EnableGuaranteedTailCallOpt) -+CGOPT(bool, DisableTailCalls) -+CGOPT(bool, StackSymbolOrdering) -+CGOPT(unsigned, OverrideStackAlignment) -+CGOPT(bool, StackRealign) -+CGOPT(std::string, TrapFuncName) -+CGOPT(bool, UseCtors) -+CGOPT(bool, RelaxELFRelocations) -+CGOPT_EXP(bool, DataSections) -+CGOPT_EXP(bool, FunctionSections) -+CGOPT(std::string, BBSections) -+CGOPT(unsigned, TLSSize) -+CGOPT(bool, EmulatedTLS) -+CGOPT(bool, UniqueSectionNames) -+CGOPT(bool, UniqueBBSectionNames) -+CGOPT(EABI, EABIVersion) -+CGOPT(DebuggerKind, DebuggerTuningOpt) -+CGOPT(bool, EnableStackSizeSection) -+CGOPT(bool, EnableAddrsig) -+CGOPT(bool, EmitCallSiteInfo) -+CGOPT(bool, EnableDebugEntryValues) -+CGOPT(bool, ForceDwarfFrameSection) -+ -+codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { -+#define CGBINDOPT(NAME) \ -+ do { \ -+ NAME##View = std::addressof(NAME); \ -+ } while (0) -+ -+ static cl::opt MArch( -+ "march", cl::desc("Architecture to generate code for (see --version)")); -+ CGBINDOPT(MArch); -+ -+ static cl::opt MCPU( -+ "mcpu", cl::desc("Target a specific cpu type (-mcpu=help for details)"), -+ cl::value_desc("cpu-name"), cl::init("")); -+ CGBINDOPT(MCPU); -+ -+ static cl::list MAttrs( -+ "mattr", cl::CommaSeparated, -+ cl::desc("Target specific attributes (-mattr=help for details)"), -+ cl::value_desc("a1,+a2,-a3,...")); -+ CGBINDOPT(MAttrs); -+ -+ static cl::opt RelocModel( -+ "relocation-model", cl::desc("Choose relocation model"), -+ cl::values( -+ clEnumValN(Reloc::Static, "static", "Non-relocatable code"), -+ clEnumValN(Reloc::PIC_, "pic", -+ "Fully relocatable, position independent code"), -+ clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic", -+ "Relocatable external references, non-relocatable code"), -+ clEnumValN( -+ Reloc::ROPI, "ropi", -+ "Code and read-only data relocatable, accessed PC-relative"), -+ clEnumValN( -+ Reloc::RWPI, "rwpi", -+ "Read-write data relocatable, accessed relative to static base"), -+ clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi", -+ "Combination of ropi and rwpi"))); -+ CGBINDOPT(RelocModel); -+ -+ static cl::opt ThreadModel( -+ "thread-model", cl::desc("Choose threading model"), -+ cl::init(ThreadModel::POSIX), -+ cl::values( -+ clEnumValN(ThreadModel::POSIX, "posix", "POSIX thread model"), -+ clEnumValN(ThreadModel::Single, "single", "Single thread model"))); -+ CGBINDOPT(ThreadModel); -+ -+ static cl::opt CodeModel( -+ "code-model", cl::desc("Choose code model"), -+ cl::values(clEnumValN(CodeModel::Tiny, "tiny", "Tiny code model"), -+ clEnumValN(CodeModel::Small, "small", "Small code model"), -+ clEnumValN(CodeModel::Kernel, "kernel", "Kernel code model"), -+ clEnumValN(CodeModel::Medium, "medium", "Medium code model"), -+ clEnumValN(CodeModel::Large, "large", "Large code model"))); -+ CGBINDOPT(CodeModel); -+ -+ static cl::opt ExceptionModel( -+ "exception-model", cl::desc("exception model"), -+ cl::init(ExceptionHandling::None), -+ cl::values( -+ clEnumValN(ExceptionHandling::None, "default", -+ "default exception handling model"), -+ clEnumValN(ExceptionHandling::DwarfCFI, "dwarf", -+ "DWARF-like CFI based exception handling"), -+ clEnumValN(ExceptionHandling::SjLj, "sjlj", -+ "SjLj exception handling"), -+ clEnumValN(ExceptionHandling::ARM, "arm", "ARM EHABI exceptions"), -+ clEnumValN(ExceptionHandling::WinEH, "wineh", -+ "Windows exception model"), -+ clEnumValN(ExceptionHandling::Wasm, "wasm", -+ "WebAssembly exception handling"))); -+ CGBINDOPT(ExceptionModel); -+ -+ static cl::opt FileType( -+ "filetype", cl::init(CGFT_AssemblyFile), -+ cl::desc( -+ "Choose a file type (not all types are supported by all targets):"), -+ cl::values( -+ clEnumValN(CGFT_AssemblyFile, "asm", "Emit an assembly ('.s') file"), -+ clEnumValN(CGFT_ObjectFile, "obj", -+ "Emit a native object ('.o') file"), -+ clEnumValN(CGFT_Null, "null", -+ "Emit nothing, for performance testing"))); -+ CGBINDOPT(FileType); -+ -+ static cl::opt FramePointerUsage( -+ "frame-pointer", -+ cl::desc("Specify frame pointer elimination optimization"), -+ cl::init(FramePointer::None), -+ cl::values( -+ clEnumValN(FramePointer::All, "all", -+ "Disable frame pointer elimination"), -+ clEnumValN(FramePointer::NonLeaf, "non-leaf", -+ "Disable frame pointer elimination for non-leaf frame"), -+ clEnumValN(FramePointer::None, "none", -+ "Enable frame pointer elimination"))); -+ CGBINDOPT(FramePointerUsage); -+ -+ static cl::opt EnableUnsafeFPMath( -+ "enable-unsafe-fp-math", -+ cl::desc("Enable optimizations that may decrease FP precision"), -+ cl::init(false)); -+ CGBINDOPT(EnableUnsafeFPMath); -+ -+ static cl::opt EnableNoInfsFPMath( -+ "enable-no-infs-fp-math", -+ cl::desc("Enable FP math optimizations that assume no +-Infs"), -+ cl::init(false)); -+ CGBINDOPT(EnableNoInfsFPMath); -+ -+ static cl::opt EnableNoNaNsFPMath( -+ "enable-no-nans-fp-math", -+ cl::desc("Enable FP math optimizations that assume no NaNs"), -+ cl::init(false)); -+ CGBINDOPT(EnableNoNaNsFPMath); -+ -+ static cl::opt EnableNoSignedZerosFPMath( -+ "enable-no-signed-zeros-fp-math", -+ cl::desc("Enable FP math optimizations that assume " -+ "the sign of 0 is insignificant"), -+ cl::init(false)); -+ CGBINDOPT(EnableNoSignedZerosFPMath); -+ -+ static cl::opt EnableNoTrappingFPMath( -+ "enable-no-trapping-fp-math", -+ cl::desc("Enable setting the FP exceptions build " -+ "attribute not to use exceptions"), -+ cl::init(false)); -+ CGBINDOPT(EnableNoTrappingFPMath); -+ -+ static cl::opt DenormalFPMath( -+ "denormal-fp-math", -+ cl::desc( -+ "Select which denormal numbers the code is permitted to require"), -+ cl::init(FPDenormal::IEEE), -+ cl::values( -+ clEnumValN(FPDenormal::IEEE, "ieee", "IEEE 754 denormal numbers"), -+ clEnumValN(FPDenormal::PreserveSign, "preserve-sign", -+ "the sign of a flushed-to-zero number is preserved " -+ "in the sign of 0"), -+ clEnumValN(FPDenormal::PositiveZero, "positive-zero", -+ "denormals are flushed to positive zero"))); -+ CGBINDOPT(DenormalFPMath); -+ -+ static cl::opt EnableHonorSignDependentRoundingFPMath( -+ "enable-sign-dependent-rounding-fp-math", cl::Hidden, -+ cl::desc("Force codegen to assume rounding mode can change dynamically"), -+ cl::init(false)); -+ CGBINDOPT(EnableHonorSignDependentRoundingFPMath); -+ -+ static cl::opt FloatABIForCalls( -+ "float-abi", cl::desc("Choose float ABI type"), -+ cl::init(FloatABI::Default), -+ cl::values(clEnumValN(FloatABI::Default, "default", -+ "Target default float ABI type"), -+ clEnumValN(FloatABI::Soft, "soft", -+ "Soft float ABI (implied by -soft-float)"), -+ clEnumValN(FloatABI::Hard, "hard", -+ "Hard float ABI (uses FP registers)"))); -+ CGBINDOPT(FloatABIForCalls); -+ -+ static cl::opt FuseFPOps( -+ "fp-contract", cl::desc("Enable aggressive formation of fused FP ops"), -+ cl::init(FPOpFusion::Standard), -+ cl::values( -+ clEnumValN(FPOpFusion::Fast, "fast", -+ "Fuse FP ops whenever profitable"), -+ clEnumValN(FPOpFusion::Standard, "on", "Only fuse 'blessed' FP ops."), -+ clEnumValN(FPOpFusion::Strict, "off", -+ "Only fuse FP ops when the result won't be affected."))); -+ CGBINDOPT(FuseFPOps); -+ -+ static cl::opt DontPlaceZerosInBSS( -+ "nozero-initialized-in-bss", -+ cl::desc("Don't place zero-initialized symbols into bss section"), -+ cl::init(false)); -+ CGBINDOPT(DontPlaceZerosInBSS); -+ -+ static cl::opt EnableGuaranteedTailCallOpt( -+ "tailcallopt", -+ cl::desc( -+ "Turn fastcc calls into tail calls by (potentially) changing ABI."), -+ cl::init(false)); -+ CGBINDOPT(EnableGuaranteedTailCallOpt); -+ -+ static cl::opt DisableTailCalls( -+ "disable-tail-calls", cl::desc("Never emit tail calls"), cl::init(false)); -+ CGBINDOPT(DisableTailCalls); -+ -+ static cl::opt StackSymbolOrdering( -+ "stack-symbol-ordering", cl::desc("Order local stack symbols."), -+ cl::init(true)); -+ CGBINDOPT(StackSymbolOrdering); -+ -+ static cl::opt OverrideStackAlignment( -+ "stack-alignment", cl::desc("Override default stack alignment"), -+ cl::init(0)); -+ CGBINDOPT(OverrideStackAlignment); -+ -+ static cl::opt StackRealign( -+ "stackrealign", -+ cl::desc("Force align the stack to the minimum alignment"), -+ cl::init(false)); -+ CGBINDOPT(StackRealign); -+ -+ static cl::opt TrapFuncName( -+ "trap-func", cl::Hidden, -+ cl::desc("Emit a call to trap function rather than a trap instruction"), -+ cl::init("")); -+ CGBINDOPT(TrapFuncName); -+ -+ static cl::opt UseCtors("use-ctors", -+ cl::desc("Use .ctors instead of .init_array."), -+ cl::init(false)); -+ CGBINDOPT(UseCtors); -+ -+ static cl::opt RelaxELFRelocations( -+ "relax-elf-relocations", -+ cl::desc( -+ "Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"), -+ cl::init(false)); -+ CGBINDOPT(RelaxELFRelocations); -+ -+ static cl::opt DataSections( -+ "data-sections", cl::desc("Emit data into separate sections"), -+ cl::init(false)); -+ CGBINDOPT(DataSections); -+ -+ static cl::opt FunctionSections( -+ "function-sections", cl::desc("Emit functions into separate sections"), -+ cl::init(false)); -+ CGBINDOPT(FunctionSections); -+ -+ static cl::opt BBSections( -+ "basicblock-sections", -+ cl::desc("Emit basic blocks into separate sections"), -+ cl::value_desc("all | | labels | none"), -+ cl::init("none")); -+ CGBINDOPT(BBSections); -+ -+ static cl::opt TLSSize( -+ "tls-size", cl::desc("Bit size of immediate TLS offsets"), cl::init(0)); -+ CGBINDOPT(TLSSize); -+ -+ static cl::opt EmulatedTLS( -+ "emulated-tls", cl::desc("Use emulated TLS model"), cl::init(false)); -+ CGBINDOPT(EmulatedTLS); -+ -+ static cl::opt UniqueSectionNames( -+ "unique-section-names", cl::desc("Give unique names to every section"), -+ cl::init(true)); -+ CGBINDOPT(UniqueSectionNames); -+ -+ static cl::opt UniqueBBSectionNames( -+ "unique-bb-section-names", -+ cl::desc("Give unique names to every basic block section"), -+ cl::init(false)); -+ CGBINDOPT(UniqueBBSectionNames); -+ -+ static cl::opt EABIVersion( -+ "meabi", cl::desc("Set EABI type (default depends on triple):"), -+ cl::init(EABI::Default), -+ cl::values( -+ clEnumValN(EABI::Default, "default", "Triple default EABI version"), -+ clEnumValN(EABI::EABI4, "4", "EABI version 4"), -+ clEnumValN(EABI::EABI5, "5", "EABI version 5"), -+ clEnumValN(EABI::GNU, "gnu", "EABI GNU"))); -+ CGBINDOPT(EABIVersion); -+ -+ static cl::opt DebuggerTuningOpt( -+ "debugger-tune", cl::desc("Tune debug info for a particular debugger"), -+ cl::init(DebuggerKind::Default), -+ cl::values( -+ clEnumValN(DebuggerKind::GDB, "gdb", "gdb"), -+ clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"), -+ clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)"))); -+ CGBINDOPT(DebuggerTuningOpt); -+ -+ static cl::opt EnableStackSizeSection( -+ "stack-size-section", -+ cl::desc("Emit a section containing stack size metadata"), -+ cl::init(false)); -+ CGBINDOPT(EnableStackSizeSection); -+ -+ static cl::opt EnableAddrsig( -+ "addrsig", cl::desc("Emit an address-significance table"), -+ cl::init(false)); -+ CGBINDOPT(EnableAddrsig); -+ -+ static cl::opt EmitCallSiteInfo( -+ "emit-call-site-info", -+ cl::desc( -+ "Emit call site debug information, if debug information is enabled."), -+ cl::init(false)); -+ CGBINDOPT(EmitCallSiteInfo); -+ -+ static cl::opt EnableDebugEntryValues( -+ "debug-entry-values", -+ cl::desc("Emit debug info about parameter's entry values"), -+ cl::init(false)); -+ CGBINDOPT(EnableDebugEntryValues); -+ -+ static cl::opt ForceDwarfFrameSection( -+ "force-dwarf-frame-section", -+ cl::desc("Always emit a debug frame section."), cl::init(false)); -+ CGBINDOPT(ForceDwarfFrameSection); -+ -+#undef CGBINDOPT -+ -+ mc::RegisterMCTargetOptionsFlags(); -+} -+ -+llvm::BasicBlockSection -+codegen::getBBSectionsMode(llvm::TargetOptions &Options) { -+ if (getBBSections() == "all") -+ return BasicBlockSection::All; -+ else if (getBBSections() == "labels") -+ return BasicBlockSection::Labels; -+ else if (getBBSections() == "none") -+ return BasicBlockSection::None; -+ else { -+ ErrorOr> MBOrErr = -+ MemoryBuffer::getFile(getBBSections()); -+ if (!MBOrErr) { -+ errs() << "Error loading basic block sections function list file: " -+ << MBOrErr.getError().message() << "\n"; -+ } else { -+ Options.BBSectionsFuncListBuf = std::move(*MBOrErr); -+ } -+ return BasicBlockSection::List; -+ } -+} -+ -+// Common utility function tightly tied to the options listed here. Initializes -+// a TargetOptions object with CodeGen flags and returns it. -+TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() { -+ TargetOptions Options; -+ Options.AllowFPOpFusion = getFuseFPOps(); -+ Options.UnsafeFPMath = getEnableUnsafeFPMath(); -+ Options.NoInfsFPMath = getEnableNoInfsFPMath(); -+ Options.NoNaNsFPMath = getEnableNoNaNsFPMath(); -+ Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath(); -+ Options.NoTrappingFPMath = getEnableNoTrappingFPMath(); -+ Options.FPDenormalMode = getDenormalFPMath(); -+ Options.HonorSignDependentRoundingFPMathOption = -+ getEnableHonorSignDependentRoundingFPMath(); -+ if (getFloatABIForCalls() != FloatABI::Default) -+ Options.FloatABIType = getFloatABIForCalls(); -+ Options.NoZerosInBSS = getDontPlaceZerosInBSS(); -+ Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt(); -+ Options.StackAlignmentOverride = getOverrideStackAlignment(); -+ Options.StackSymbolOrdering = getStackSymbolOrdering(); -+ Options.UseInitArray = !getUseCtors(); -+ Options.RelaxELFRelocations = getRelaxELFRelocations(); -+ Options.DataSections = getDataSections(); -+ Options.FunctionSections = getFunctionSections(); -+ Options.BBSections = getBBSectionsMode(Options); -+ Options.UniqueSectionNames = getUniqueSectionNames(); -+ Options.UniqueBBSectionNames = getUniqueBBSectionNames(); -+ Options.TLSSize = getTLSSize(); -+ Options.EmulatedTLS = getEmulatedTLS(); -+ Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0; -+ Options.ExceptionModel = getExceptionModel(); -+ Options.EmitStackSizeSection = getEnableStackSizeSection(); -+ Options.EmitAddrsig = getEnableAddrsig(); -+ Options.EmitCallSiteInfo = getEmitCallSiteInfo(); -+ Options.EnableDebugEntryValues = getEnableDebugEntryValues(); -+ Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); -+ -+ Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); -+ -+ Options.ThreadModel = getThreadModel(); -+ Options.EABIVersion = getEABIVersion(); -+ Options.DebuggerTuning = getDebuggerTuningOpt(); -+ -+ return Options; -+} -+ -+std::string codegen::getCPUStr() { -+ // If user asked for the 'native' CPU, autodetect here. If autodection fails, -+ // this will set the CPU to an empty string which tells the target to -+ // pick a basic default. -+ if (getMCPU() == "native") -+ return std::string(sys::getHostCPUName()); -+ -+ return getMCPU(); -+} -+ -+std::string codegen::getFeaturesStr() { -+ SubtargetFeatures Features; -+ -+ // If user asked for the 'native' CPU, we need to autodetect features. -+ // This is necessary for x86 where the CPU might not support all the -+ // features the autodetected CPU name lists in the target. For example, -+ // not all Sandybridge processors support AVX. -+ if (getMCPU() == "native") { -+ StringMap HostFeatures; -+ if (sys::getHostCPUFeatures(HostFeatures)) -+ for (auto &F : HostFeatures) -+ Features.AddFeature(F.first(), F.second); -+ } -+ -+ for (auto const &MAttr : getMAttrs()) -+ Features.AddFeature(MAttr); -+ -+ return Features.getString(); -+} -+ -+std::vector codegen::getFeatureList() { -+ SubtargetFeatures Features; -+ -+ // If user asked for the 'native' CPU, we need to autodetect features. -+ // This is necessary for x86 where the CPU might not support all the -+ // features the autodetected CPU name lists in the target. For example, -+ // not all Sandybridge processors support AVX. -+ if (getMCPU() == "native") { -+ StringMap HostFeatures; -+ if (sys::getHostCPUFeatures(HostFeatures)) -+ for (auto &F : HostFeatures) -+ Features.AddFeature(F.first(), F.second); -+ } -+ -+ for (auto const &MAttr : getMAttrs()) -+ Features.AddFeature(MAttr); -+ -+ return Features.getFeatures(); -+} -+ -+void codegen::renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val) { -+ B.addAttribute(Name, Val ? "true" : "false"); -+} -+ -+#define HANDLE_BOOL_ATTR(CL, AttrName) \ -+ do { \ -+ if (CL->getNumOccurrences() > 0 && !F.hasFnAttribute(AttrName)) \ -+ renderBoolStringAttr(NewAttrs, AttrName, *CL); \ -+ } while (0) -+ -+/// Set function attributes of function \p F based on CPU, Features, and command -+/// line flags. -+void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, -+ Function &F) { -+ auto &Ctx = F.getContext(); -+ AttributeList Attrs = F.getAttributes(); -+ AttrBuilder NewAttrs; -+ -+ if (!CPU.empty() && !F.hasFnAttribute("target-cpu")) -+ NewAttrs.addAttribute("target-cpu", CPU); -+ if (!Features.empty()) { -+ // Append the command line features to any that are already on the function. -+ StringRef OldFeatures = -+ F.getFnAttribute("target-features").getValueAsString(); -+ if (OldFeatures.empty()) -+ NewAttrs.addAttribute("target-features", Features); -+ else { -+ SmallString<256> Appended(OldFeatures); -+ Appended.push_back(','); -+ Appended.append(Features); -+ NewAttrs.addAttribute("target-features", Appended); -+ } -+ } -+ if (FramePointerUsageView->getNumOccurrences() > 0 && -+ !F.hasFnAttribute("frame-pointer")) { -+ if (getFramePointerUsage() == FramePointer::All) -+ NewAttrs.addAttribute("frame-pointer", "all"); -+ else if (getFramePointerUsage() == FramePointer::NonLeaf) -+ NewAttrs.addAttribute("frame-pointer", "non-leaf"); -+ else if (getFramePointerUsage() == FramePointer::None) -+ NewAttrs.addAttribute("frame-pointer", "none"); -+ } -+ if (DisableTailCallsView->getNumOccurrences() > 0) -+ NewAttrs.addAttribute("disable-tail-calls", -+ toStringRef(getDisableTailCalls())); -+ if (getStackRealign()) -+ NewAttrs.addAttribute("stackrealign"); -+ -+ HANDLE_BOOL_ATTR(EnableUnsafeFPMathView, "unsafe-fp-math"); -+ HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math"); -+ HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math"); -+ HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math"); -+ -+ if (TrapFuncNameView->getNumOccurrences() > 0) -+ for (auto &B : F) -+ for (auto &I : B) -+ if (auto *Call = dyn_cast(&I)) -+ if (const auto *F = Call->getCalledFunction()) -+ if (F->getIntrinsicID() == Intrinsic::debugtrap || -+ F->getIntrinsicID() == Intrinsic::trap) -+ Call->addAttribute( -+ AttributeList::FunctionIndex, -+ Attribute::get(Ctx, "trap-func-name", getTrapFuncName())); -+ -+ // Let NewAttrs override Attrs. -+ F.setAttributes( -+ Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); -+} -+ -+/// Set function attributes of functions in Module M based on CPU, -+/// Features, and command line flags. -+void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, -+ Module &M) { -+ for (Function &F : M) -+ setFunctionAttributes(CPU, Features, F); -+} -diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt -index de2e47d8d9b2..ab809daf5273 100644 ---- a/llvm/lib/MC/CMakeLists.txt -+++ b/llvm/lib/MC/CMakeLists.txt -@@ -44,6 +44,7 @@ add_llvm_component_library(LLVMMC - MCSymbol.cpp - MCSymbolELF.cpp - MCTargetOptions.cpp -+ MCTargetOptionsCommandFlags.cpp - MCValue.cpp - MCWasmObjectTargetWriter.cpp - MCWasmStreamer.cpp -diff --git a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp -new file mode 100644 -index 000000000000..3ca34061241a ---- /dev/null -+++ b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp -@@ -0,0 +1,105 @@ -+//===-- MCTargetOptionsCommandFlags.cpp --------------------------*- C++ -+//-*-===// -+// -+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -+// See https://llvm.org/LICENSE.txt for license information. -+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -+// -+//===----------------------------------------------------------------------===// -+// -+// This file contains machine code-specific flags that are shared between -+// different command line tools. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "llvm/MC/MCTargetOptionsCommandFlags.h" -+ -+using namespace llvm; -+ -+#define MCOPT(TY, NAME) \ -+ static cl::opt *NAME##View; \ -+ TY llvm::mc::get##NAME() { \ -+ assert(NAME##View && "RegisterMCTargetOptionsFlags not created."); \ -+ return *NAME##View; \ -+ } -+ -+#define MCOPT_EXP(TY, NAME) \ -+ MCOPT(TY, NAME) \ -+ Optional llvm::mc::getExplicit##NAME() { \ -+ if (NAME##View->getNumOccurrences()) { \ -+ TY res = *NAME##View; \ -+ return res; \ -+ } \ -+ return None; \ -+ } -+ -+MCOPT_EXP(bool, RelaxAll) -+MCOPT(bool, IncrementalLinkerCompatible) -+MCOPT(int, DwarfVersion) -+MCOPT(bool, ShowMCInst) -+MCOPT(bool, FatalWarnings) -+MCOPT(bool, NoWarn) -+MCOPT(bool, NoDeprecatedWarn) -+MCOPT(std::string, ABIName) -+ -+llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() { -+#define MCBINDOPT(NAME) \ -+ do { \ -+ NAME##View = std::addressof(NAME); \ -+ } while (0) -+ -+ static cl::opt RelaxAll( -+ "mc-relax-all", cl::desc("When used with filetype=obj, relax all fixups " -+ "in the emitted object file")); -+ MCBINDOPT(RelaxAll); -+ -+ static cl::opt IncrementalLinkerCompatible( -+ "incremental-linker-compatible", -+ cl::desc( -+ "When used with filetype=obj, " -+ "emit an object file which can be used with an incremental linker")); -+ MCBINDOPT(IncrementalLinkerCompatible); -+ -+ static cl::opt DwarfVersion("dwarf-version", cl::desc("Dwarf version"), -+ cl::init(0)); -+ MCBINDOPT(DwarfVersion); -+ -+ static cl::opt ShowMCInst( -+ "asm-show-inst", -+ cl::desc("Emit internal instruction representation to assembly file")); -+ MCBINDOPT(ShowMCInst); -+ -+ static cl::opt FatalWarnings("fatal-warnings", -+ cl::desc("Treat warnings as errors")); -+ MCBINDOPT(FatalWarnings); -+ -+ static cl::opt NoWarn("no-warn", cl::desc("Suppress all warnings")); -+ static cl::alias NoWarnW("W", cl::desc("Alias for --no-warn"), -+ cl::aliasopt(NoWarn)); -+ MCBINDOPT(NoWarn); -+ -+ static cl::opt NoDeprecatedWarn( -+ "no-deprecated-warn", cl::desc("Suppress all deprecated warnings")); -+ MCBINDOPT(NoDeprecatedWarn); -+ -+ static cl::opt ABIName( -+ "target-abi", cl::Hidden, -+ cl::desc("The name of the ABI to be targeted from the backend."), -+ cl::init("")); -+ MCBINDOPT(ABIName); -+ -+#undef MCBINDOPT -+} -+ -+MCTargetOptions llvm::mc::InitMCTargetOptionsFromFlags() { -+ MCTargetOptions Options; -+ Options.MCRelaxAll = getRelaxAll(); -+ Options.MCIncrementalLinkerCompatible = getIncrementalLinkerCompatible(); -+ Options.DwarfVersion = getDwarfVersion(); -+ Options.ShowMCInst = getShowMCInst(); -+ Options.ABIName = getABIName(); -+ Options.MCFatalWarnings = getFatalWarnings(); -+ Options.MCNoWarn = getNoWarn(); -+ Options.MCNoDeprecatedWarn = getNoDeprecatedWarn(); -+ return Options; -+} -diff --git a/llvm/tools/dsymutil/DwarfStreamer.cpp b/llvm/tools/dsymutil/DwarfStreamer.cpp -index 3e132c29eada..eb068effbc71 100644 ---- a/llvm/tools/dsymutil/DwarfStreamer.cpp -+++ b/llvm/tools/dsymutil/DwarfStreamer.cpp -@@ -13,13 +13,16 @@ - #include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" - #include "llvm/DebugInfo/DWARF/DWARFContext.h" - #include "llvm/MC/MCTargetOptions.h" --#include "llvm/MC/MCTargetOptionsCommandFlags.inc" -+#include "llvm/MC/MCTargetOptionsCommandFlags.h" - #include "llvm/Support/LEB128.h" - #include "llvm/Support/TargetRegistry.h" - #include "llvm/Target/TargetMachine.h" - #include "llvm/Target/TargetOptions.h" - - namespace llvm { -+ -+static mc::RegisterMCTargetOptionsFlags MOF; -+ - namespace dsymutil { - - /// Retrieve the section named \a SecName in \a Obj. -@@ -61,7 +64,7 @@ bool DwarfStreamer::init(Triple TheTriple) { - if (!MRI) - return error(Twine("no register info for target ") + TripleName, Context); - -- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); -+ MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); - MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); - if (!MAI) - return error("no asm info for target " + TripleName, Context); -diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp -index 406079dad307..95e72b861197 100644 ---- a/llvm/tools/gold/gold-plugin.cpp -+++ b/llvm/tools/gold/gold-plugin.cpp -@@ -14,7 +14,7 @@ - #include "llvm/ADT/Statistic.h" - #include "llvm/Bitcode/BitcodeReader.h" - #include "llvm/Bitcode/BitcodeWriter.h" --#include "llvm/CodeGen/CommandFlags.inc" -+#include "llvm/CodeGen/CommandFlags.h" - #include "llvm/Config/config.h" // plugin-api.h requires HAVE_STDINT_H - #include "llvm/IR/Constants.h" - #include "llvm/IR/DiagnosticPrinter.h" -@@ -50,6 +50,8 @@ - using namespace llvm; - using namespace lto; - -+static codegen::RegisterCodeGenFlags CodeGenFlags; -+ - // FIXME: Remove when binutils 2.31 (containing gold 1.16) is the minimum - // required version. - typedef enum ld_plugin_status (*ld_plugin_get_wrap_symbols)( -@@ -842,21 +844,21 @@ static std::unique_ptr createLTO(IndexWriteCallback OnIndexWrite, - ThinBackend Backend; - - Conf.CPU = options::mcpu; -- Conf.Options = InitTargetOptionsFromCodeGenFlags(); -+ Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(); - - // Disable the new X86 relax relocations since gold might not support them. - // FIXME: Check the gold version or add a new option to enable them. - Conf.Options.RelaxELFRelocations = false; - - // Toggle function/data sections. -- if (FunctionSections.getNumOccurrences() == 0) -+ if (!codegen::getExplicitFunctionSections()) - Conf.Options.FunctionSections = SplitSections; -- if (DataSections.getNumOccurrences() == 0) -+ if (!codegen::getExplicitDataSections()) - Conf.Options.DataSections = SplitSections; - -- Conf.MAttrs = MAttrs; -- Conf.RelocModel = RelocationModel; -- Conf.CodeModel = getCodeModel(); -+ Conf.MAttrs = codegen::getMAttrs(); -+ Conf.RelocModel = codegen::getExplicitRelocModel(); -+ Conf.CodeModel = codegen::getExplicitCodeModel(); - Conf.CGOptLevel = getCGOptLevel(); - Conf.DisableVerify = options::DisableVerify; - Conf.OptLevel = options::OptLevel; -diff --git a/llvm/tools/llc/CMakeLists.txt b/llvm/tools/llc/CMakeLists.txt -index 880deefa539c..479bc6b55b27 100644 ---- a/llvm/tools/llc/CMakeLists.txt -+++ b/llvm/tools/llc/CMakeLists.txt -@@ -26,4 +26,5 @@ add_llvm_tool(llc - intrinsics_gen - SUPPORT_PLUGINS - ) -+ - export_executable_symbols(llc) -diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp -index b35f8e853c30..4c41ed292fdc 100644 ---- a/llvm/tools/llc/llc.cpp -+++ b/llvm/tools/llc/llc.cpp -@@ -15,7 +15,7 @@ - #include "llvm/ADT/STLExtras.h" - #include "llvm/ADT/Triple.h" - #include "llvm/Analysis/TargetLibraryInfo.h" --#include "llvm/CodeGen/CommandFlags.inc" -+#include "llvm/CodeGen/CommandFlags.h" - #include "llvm/CodeGen/LinkAllAsmWriterComponents.h" - #include "llvm/CodeGen/LinkAllCodegenComponents.h" - #include "llvm/CodeGen/MIRParser/MIRParser.h" -@@ -55,6 +55,8 @@ - #include - using namespace llvm; - -+static codegen::RegisterCodeGenFlags CGF; -+ - // General options for llc. Other pass-specific options are specified - // within the corresponding llc passes, and target-specific options - // and back-end code generation options are specified with the target machine. -@@ -202,7 +204,7 @@ static std::unique_ptr GetOutputStream(const char *TargetName, - else - OutputFilename = IFN; - -- switch (FileType) { -+ switch (codegen::getFileType()) { - case CGFT_AssemblyFile: - if (TargetName[0] == 'c') { - if (TargetName[1] == 0) -@@ -229,7 +231,7 @@ static std::unique_ptr GetOutputStream(const char *TargetName, - - // Decide if we need "binary" output. - bool Binary = false; -- switch (FileType) { -+ switch (codegen::getFileType()) { - case CGFT_AssemblyFile: - break; - case CGFT_ObjectFile: -@@ -395,14 +397,16 @@ static int compileModule(char **argv, LLVMContext &Context) { - std::unique_ptr M; - std::unique_ptr MIR; - Triple TheTriple; -- std::string CPUStr = getCPUStr(), FeaturesStr = getFeaturesStr(); -+ std::string CPUStr = codegen::getCPUStr(), -+ FeaturesStr = codegen::getFeaturesStr(); - - // Set attributes on functions as loaded from MIR from command line arguments. - auto setMIRFunctionAttributes = [&CPUStr, &FeaturesStr](Function &F) { -- setFunctionAttributes(CPUStr, FeaturesStr, F); -+ codegen::setFunctionAttributes(CPUStr, FeaturesStr, F); - }; - -- bool SkipModule = MCPU == "help" || -+ auto MAttrs = codegen::getMAttrs(); -+ bool SkipModule = codegen::getMCPU() == "help" || - (!MAttrs.empty() && MAttrs.front() == "help"); - - // If user just wants to list available options, skip module loading -@@ -433,8 +437,8 @@ static int compileModule(char **argv, LLVMContext &Context) { - - // Get the target specific parser. - std::string Error; -- const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple, -- Error); -+ const Target *TheTarget = -+ TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error); - if (!TheTarget) { - WithColor::error(errs(), argv[0]) << Error; - return 1; -@@ -452,7 +456,7 @@ static int compileModule(char **argv, LLVMContext &Context) { - case '3': OLvl = CodeGenOpt::Aggressive; break; - } - -- TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - Options.DisableIntegratedAS = NoIntegratedAssembler; - Options.MCOptions.ShowMCEncoding = ShowMCEncoding; - Options.MCOptions.MCUseDwarfDirectory = EnableDwarfDirectory; -@@ -462,8 +466,8 @@ static int compileModule(char **argv, LLVMContext &Context) { - Options.MCOptions.SplitDwarfFile = SplitDwarfFile; - - std::unique_ptr Target(TheTarget->createTargetMachine( -- TheTriple.getTriple(), CPUStr, FeaturesStr, Options, getRelocModel(), -- getCodeModel(), OLvl)); -+ TheTriple.getTriple(), CPUStr, FeaturesStr, Options, codegen::getExplicitRelocModel(), -+ codegen::getExplicitCodeModel(), OLvl)); - - assert(Target && "Could not allocate target machine!"); - -@@ -474,8 +478,8 @@ static int compileModule(char **argv, LLVMContext &Context) { - return 0; - - assert(M && "Should have exited if we didn't have a module!"); -- if (FloatABIForCalls != FloatABI::Default) -- Options.FloatABIType = FloatABIForCalls; -+ if (codegen::getFloatABIForCalls() != FloatABI::Default) -+ Options.FloatABIType = codegen::getFloatABIForCalls(); - - // Figure out where we are going to send the output. - std::unique_ptr Out = -@@ -522,10 +526,9 @@ static int compileModule(char **argv, LLVMContext &Context) { - - // Override function attributes based on CPUStr, FeaturesStr, and command line - // flags. -- setFunctionAttributes(CPUStr, FeaturesStr, *M); -+ codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M); - -- if (RelaxAll.getNumOccurrences() > 0 && -- FileType != CGFT_ObjectFile) -+ if (mc::getExplicitRelaxAll() && codegen::getFileType() != CGFT_ObjectFile) - WithColor::warning(errs(), argv[0]) - << ": warning: ignoring -mc-relax-all because filetype != obj"; - -@@ -536,7 +539,7 @@ static int compileModule(char **argv, LLVMContext &Context) { - // so we can memcmp the contents in CompileTwice mode - SmallVector Buffer; - std::unique_ptr BOS; -- if ((FileType != CGFT_AssemblyFile && -+ if ((codegen::getFileType() != CGFT_AssemblyFile && - !Out->os().supportsSeeking()) || - CompileTwice) { - BOS = std::make_unique(Buffer); -@@ -575,9 +578,9 @@ static int compileModule(char **argv, LLVMContext &Context) { - TPC.setInitialized(); - PM.add(createPrintMIRPass(*OS)); - PM.add(createFreeMachineFunctionPass()); -- } else if (Target->addPassesToEmitFile(PM, *OS, -- DwoOut ? &DwoOut->os() : nullptr, -- FileType, NoVerify, MMIWP)) { -+ } else if (Target->addPassesToEmitFile( -+ PM, *OS, DwoOut ? &DwoOut->os() : nullptr, -+ codegen::getFileType(), NoVerify, MMIWP)) { - WithColor::warning(errs(), argv[0]) - << "target does not support generation of this" - << " file type!\n"; -diff --git a/llvm/tools/lli/CMakeLists.txt b/llvm/tools/lli/CMakeLists.txt -index db163ad131e8..bc6ef213b8fd 100644 ---- a/llvm/tools/lli/CMakeLists.txt -+++ b/llvm/tools/lli/CMakeLists.txt -@@ -53,4 +53,5 @@ add_llvm_tool(lli - DEPENDS - intrinsics_gen - ) -+ - export_executable_symbols(lli) -diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp -index 0efd0df2c12b..b6a4e3f2833c 100644 ---- a/llvm/tools/lli/lli.cpp -+++ b/llvm/tools/lli/lli.cpp -@@ -16,7 +16,7 @@ - #include "llvm/ADT/StringExtras.h" - #include "llvm/ADT/Triple.h" - #include "llvm/Bitcode/BitcodeReader.h" --#include "llvm/CodeGen/CommandFlags.inc" -+#include "llvm/CodeGen/CommandFlags.h" - #include "llvm/CodeGen/LinkAllCodegenComponents.h" - #include "llvm/Config/llvm-config.h" - #include "llvm/ExecutionEngine/GenericValue.h" -@@ -67,6 +67,8 @@ - - using namespace llvm; - -+static codegen::RegisterCodeGenFlags CGF; -+ - #define DEBUG_TYPE "lli" - - namespace { -@@ -410,13 +412,13 @@ int main(int argc, char **argv, char * const *envp) { - - std::string ErrorMsg; - EngineBuilder builder(std::move(Owner)); -- builder.setMArch(MArch); -- builder.setMCPU(getCPUStr()); -- builder.setMAttrs(getFeatureList()); -- if (RelocModel.getNumOccurrences()) -- builder.setRelocationModel(RelocModel); -- if (CMModel.getNumOccurrences()) -- builder.setCodeModel(CMModel); -+ builder.setMArch(codegen::getMArch()); -+ builder.setMCPU(codegen::getCPUStr()); -+ builder.setMAttrs(codegen::getFeatureList()); -+ if (auto RM = codegen::getExplicitRelocModel()) -+ builder.setRelocationModel(RM.getValue()); -+ if (auto CM = codegen::getExplicitCodeModel()) -+ builder.setCodeModel(CM.getValue()); - builder.setErrorStr(&ErrorMsg); - builder.setEngineKind(ForceInterpreter - ? EngineKind::Interpreter -@@ -448,9 +450,9 @@ int main(int argc, char **argv, char * const *envp) { - - builder.setOptLevel(getOptLevel()); - -- TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -- if (FloatABIForCalls != FloatABI::Default) -- Options.FloatABIType = FloatABIForCalls; -+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); -+ if (codegen::getFloatABIForCalls() != FloatABI::Default) -+ Options.FloatABIType = codegen::getFloatABIForCalls(); - - builder.setTargetOptions(Options); - -@@ -762,18 +764,15 @@ int runOrcLazyJIT(const char *ProgName) { - TT.empty() ? ExitOnErr(orc::JITTargetMachineBuilder::detectHost()) - : orc::JITTargetMachineBuilder(Triple(TT))); - -- if (!MArch.empty()) -- Builder.getJITTargetMachineBuilder()->getTargetTriple().setArchName(MArch); -+ if (!codegen::getMArch().empty()) -+ Builder.getJITTargetMachineBuilder()->getTargetTriple().setArchName( -+ codegen::getMArch()); - - Builder.getJITTargetMachineBuilder() -- ->setCPU(getCPUStr()) -- .addFeatures(getFeatureList()) -- .setRelocationModel(RelocModel.getNumOccurrences() -- ? Optional(RelocModel) -- : None) -- .setCodeModel(CMModel.getNumOccurrences() -- ? Optional(CMModel) -- : None); -+ ->setCPU(codegen::getCPUStr()) -+ .addFeatures(codegen::getFeatureList()) -+ .setRelocationModel(codegen::getExplicitRelocModel()) -+ .setCodeModel(codegen::getExplicitCodeModel()); - - Builder.setLazyCompileFailureAddr( - pointerToJITTargetAddress(exitOnLazyCallThroughFailure)); -diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp -index 23513ef8fb4e..8cfd433d5da3 100644 ---- a/llvm/tools/llvm-dwp/llvm-dwp.cpp -+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp -@@ -27,7 +27,7 @@ - #include "llvm/MC/MCObjectWriter.h" - #include "llvm/MC/MCRegisterInfo.h" - #include "llvm/MC/MCStreamer.h" --#include "llvm/MC/MCTargetOptionsCommandFlags.inc" -+#include "llvm/MC/MCTargetOptionsCommandFlags.h" - #include "llvm/Object/Decompressor.h" - #include "llvm/Object/ObjectFile.h" - #include "llvm/Support/DataExtractor.h" -@@ -46,6 +46,8 @@ - using namespace llvm; - using namespace llvm::object; - -+static mc::RegisterMCTargetOptionsFlags MCTargetOptionsFlags; -+ - cl::OptionCategory DwpCategory("Specific Options"); - static cl::list InputFiles(cl::Positional, cl::ZeroOrMore, - cl::desc(""), -@@ -676,7 +678,7 @@ int main(int argc, char **argv) { - if (!MRI) - return error(Twine("no register info for target ") + TripleName, Context); - -- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); -+ MCTargetOptions MCOptions = llvm::mc::InitMCTargetOptionsFromFlags(); - std::unique_ptr MAI( - TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); - if (!MAI) -diff --git a/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp b/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp -index b71ed4a70566..627e9ab4c03f 100644 ---- a/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp -+++ b/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp -@@ -14,7 +14,7 @@ - #include "llvm/Analysis/TargetLibraryInfo.h" - #include "llvm/Bitcode/BitcodeReader.h" - #include "llvm/Bitcode/BitcodeWriter.h" --#include "llvm/CodeGen/CommandFlags.inc" -+#include "llvm/CodeGen/CommandFlags.h" - #include "llvm/FuzzMutate/FuzzerCLI.h" - #include "llvm/FuzzMutate/IRMutator.h" - #include "llvm/FuzzMutate/Operations.h" -@@ -35,6 +35,8 @@ - - using namespace llvm; - -+static codegen::RegisterCodeGenFlags CGF; -+ - static cl::opt - OptLevel("O", - cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " -@@ -133,14 +135,15 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, - // Get the target specific parser. - std::string Error; - const Target *TheTarget = -- TargetRegistry::lookupTarget(MArch, TheTriple, Error); -+ TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error); - if (!TheTarget) { - errs() << argv[0] << ": " << Error; - return 1; - } - - // Set up the pipeline like llc does. -- std::string CPUStr = getCPUStr(), FeaturesStr = getFeaturesStr(); -+ std::string CPUStr = codegen::getCPUStr(), -+ FeaturesStr = codegen::getFeaturesStr(); - - CodeGenOpt::Level OLvl = CodeGenOpt::Default; - switch (OptLevel) { -@@ -154,10 +157,10 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, - case '3': OLvl = CodeGenOpt::Aggressive; break; - } - -- TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -- TM.reset(TheTarget->createTargetMachine(TheTriple.getTriple(), CPUStr, -- FeaturesStr, Options, getRelocModel(), -- getCodeModel(), OLvl)); -+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); -+ TM.reset(TheTarget->createTargetMachine( -+ TheTriple.getTriple(), CPUStr, FeaturesStr, Options, -+ codegen::getExplicitRelocModel(), codegen::getExplicitCodeModel(), OLvl)); - assert(TM && "Could not allocate target machine!"); - - // Make sure we print the summary and the current unit when LLVM errors out. -diff --git a/llvm/tools/llvm-lto/CMakeLists.txt b/llvm/tools/llvm-lto/CMakeLists.txt -index 69868fb870c0..5128e713eecf 100644 ---- a/llvm/tools/llvm-lto/CMakeLists.txt -+++ b/llvm/tools/llvm-lto/CMakeLists.txt -@@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS - AllTargetsInfos - BitReader - BitWriter -+ CodeGen - Core - IRReader - LTO -@@ -17,7 +18,5 @@ set(LLVM_LINK_COMPONENTS - add_llvm_tool(llvm-lto - llvm-lto.cpp - -- DEPENDS -- intrinsics_gen -+ DEPENDS intrinsics_gen - ) -- -diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp -index b47e68e82850..7886426e8945 100644 ---- a/llvm/tools/llvm-lto/llvm-lto.cpp -+++ b/llvm/tools/llvm-lto/llvm-lto.cpp -@@ -21,7 +21,7 @@ - #include "llvm/ADT/Twine.h" - #include "llvm/Bitcode/BitcodeReader.h" - #include "llvm/Bitcode/BitcodeWriter.h" --#include "llvm/CodeGen/CommandFlags.inc" -+#include "llvm/CodeGen/CommandFlags.h" - #include "llvm/IR/DiagnosticInfo.h" - #include "llvm/IR/DiagnosticPrinter.h" - #include "llvm/IR/LLVMContext.h" -@@ -62,6 +62,8 @@ - - using namespace llvm; - -+static codegen::RegisterCodeGenFlags CGF; -+ - static cl::opt - OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " - "(default = '-O2')"), -@@ -521,7 +523,7 @@ public: - ThinLTOCodeGenerator ThinGenerator; - - ThinLTOProcessing(const TargetOptions &Options) { -- ThinGenerator.setCodePICModel(getRelocModel()); -+ ThinGenerator.setCodePICModel(codegen::getExplicitRelocModel()); - ThinGenerator.setTargetOptions(Options); - ThinGenerator.setCacheDir(ThinLTOCacheDir); - ThinGenerator.setCachePruningInterval(ThinLTOCachePruningInterval); -@@ -873,7 +875,7 @@ int main(int argc, char **argv) { - InitializeAllAsmParsers(); - - // set up the TargetOptions for the machine -- TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - - if (ListSymbolsOnly) { - listSymbols(Options); -@@ -929,7 +931,7 @@ int main(int argc, char **argv) { - if (UseDiagnosticHandler) - CodeGen.setDiagnosticHandler(handleDiagnostics, nullptr); - -- CodeGen.setCodePICModel(getRelocModel()); -+ CodeGen.setCodePICModel(codegen::getExplicitRelocModel()); - CodeGen.setFreestanding(EnableFreestanding); - - CodeGen.setDebugInfo(LTO_DEBUG_MODEL_DWARF); -@@ -980,22 +982,18 @@ int main(int argc, char **argv) { - CodeGen.addMustPreserveSymbol(KeptDSOSyms[i]); - - // Set cpu and attrs strings for the default target/subtarget. -- CodeGen.setCpu(MCPU.c_str()); -+ CodeGen.setCpu(codegen::getMCPU().c_str()); - - CodeGen.setOptLevel(OptLevel - '0'); - -- std::string attrs; -- for (unsigned i = 0; i < MAttrs.size(); ++i) { -- if (i > 0) -- attrs.append(","); -- attrs.append(MAttrs[i]); -- } -- -- if (!attrs.empty()) -+ auto MAttrs = codegen::getMAttrs(); -+ if (!MAttrs.empty()) { -+ std::string attrs = join(MAttrs, ","); - CodeGen.setAttr(attrs); -+ } - -- if (FileType.getNumOccurrences()) -- CodeGen.setFileType(FileType); -+ if (auto FT = codegen::getExplicitFileType()) -+ CodeGen.setFileType(FT.getValue()); - - if (!OutputFilename.empty()) { - if (!CodeGen.optimize(DisableVerify, DisableInline, DisableGVNLoadPRE, -diff --git a/llvm/tools/llvm-lto2/CMakeLists.txt b/llvm/tools/llvm-lto2/CMakeLists.txt -index fa2d8624fd94..4d3364175b04 100644 ---- a/llvm/tools/llvm-lto2/CMakeLists.txt -+++ b/llvm/tools/llvm-lto2/CMakeLists.txt -@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS - AllTargetsDescs - AllTargetsInfos - BitReader -+ CodeGen - Core - Linker - LTO -diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp -index 67a677dd45fb..142ba605aa2a 100644 ---- a/llvm/tools/llvm-lto2/llvm-lto2.cpp -+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp -@@ -16,7 +16,7 @@ - //===----------------------------------------------------------------------===// - - #include "llvm/Bitcode/BitcodeReader.h" --#include "llvm/CodeGen/CommandFlags.inc" -+#include "llvm/CodeGen/CommandFlags.h" - #include "llvm/IR/DiagnosticPrinter.h" - #include "llvm/LTO/Caching.h" - #include "llvm/LTO/LTO.h" -@@ -29,6 +29,8 @@ - using namespace llvm; - using namespace lto; - -+static codegen::RegisterCodeGenFlags CGF; -+ - static cl::opt - OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " - "(default = '-O2')"), -@@ -217,12 +219,12 @@ static int run(int argc, char **argv) { - exit(1); - }; - -- Conf.CPU = MCPU; -- Conf.Options = InitTargetOptionsFromCodeGenFlags(); -- Conf.MAttrs = MAttrs; -- if (auto RM = getRelocModel()) -- Conf.RelocModel = *RM; -- Conf.CodeModel = getCodeModel(); -+ Conf.CPU = codegen::getMCPU(); -+ Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(); -+ Conf.MAttrs = codegen::getMAttrs(); -+ if (auto RM = codegen::getExplicitRelocModel()) -+ Conf.RelocModel = RM.getValue(); -+ Conf.CodeModel = codegen::getExplicitCodeModel(); - - Conf.DebugPassManager = DebugPassManager; - -@@ -264,8 +266,8 @@ static int run(int argc, char **argv) { - return 1; - } - -- if (FileType.getNumOccurrences()) -- Conf.CGFileType = FileType; -+ if (auto FT = codegen::getExplicitFileType()) -+ Conf.CGFileType = FT.getValue(); - - Conf.OverrideTriple = OverrideTriple; - Conf.DefaultTriple = DefaultTriple; -diff --git a/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt b/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt -index fb6befd3c54a..6bbc502e2eee 100644 ---- a/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt -+++ b/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt -@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS - MCParser - Support - ) -+ - add_llvm_fuzzer(llvm-mc-assemble-fuzzer - llvm-mc-assemble-fuzzer.cpp - ) -diff --git a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp -index 6c5961f7027c..29699c634bfa 100644 ---- a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp -+++ b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp -@@ -9,7 +9,6 @@ - //===----------------------------------------------------------------------===// - - #include "llvm-c/Target.h" --#include "llvm/MC/SubtargetFeature.h" - #include "llvm/MC/MCAsmBackend.h" - #include "llvm/MC/MCAsmInfo.h" - #include "llvm/MC/MCCodeEmitter.h" -@@ -24,15 +23,16 @@ - #include "llvm/MC/MCSectionMachO.h" - #include "llvm/MC/MCStreamer.h" - #include "llvm/MC/MCSubtargetInfo.h" --#include "llvm/MC/MCTargetOptionsCommandFlags.inc" --#include "llvm/Support/MemoryBuffer.h" -+#include "llvm/MC/MCTargetOptionsCommandFlags.h" -+#include "llvm/MC/SubtargetFeature.h" - #include "llvm/Support/CommandLine.h" - #include "llvm/Support/FileUtilities.h" --#include "llvm/Support/raw_ostream.h" -+#include "llvm/Support/MemoryBuffer.h" - #include "llvm/Support/SourceMgr.h" --#include "llvm/Support/TargetSelect.h" - #include "llvm/Support/TargetRegistry.h" -+#include "llvm/Support/TargetSelect.h" - #include "llvm/Support/ToolOutputFile.h" -+#include "llvm/Support/raw_ostream.h" - - using namespace llvm; - -@@ -161,7 +161,7 @@ int AssembleOneInput(const uint8_t *Data, size_t Size) { - abort(); - } - -- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); -+ MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); - std::unique_ptr MAI( - TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); - if (!MAI) { -diff --git a/llvm/tools/llvm-mc/CMakeLists.txt b/llvm/tools/llvm-mc/CMakeLists.txt -index 15c6dda1b258..94add70b6943 100644 ---- a/llvm/tools/llvm-mc/CMakeLists.txt -+++ b/llvm/tools/llvm-mc/CMakeLists.txt -@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS - AllTargetsDescs - AllTargetsDisassemblers - AllTargetsInfos -+ CodeGen - MC - MCParser - Support -diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp -index 6aa347d98be2..8c1b3cf2cab0 100644 ---- a/llvm/tools/llvm-mc/llvm-mc.cpp -+++ b/llvm/tools/llvm-mc/llvm-mc.cpp -@@ -25,7 +25,7 @@ - #include "llvm/MC/MCRegisterInfo.h" - #include "llvm/MC/MCStreamer.h" - #include "llvm/MC/MCSubtargetInfo.h" --#include "llvm/MC/MCTargetOptionsCommandFlags.inc" -+#include "llvm/MC/MCTargetOptionsCommandFlags.h" - #include "llvm/Support/CommandLine.h" - #include "llvm/Support/Compression.h" - #include "llvm/Support/FileUtilities.h" -@@ -41,6 +41,8 @@ - - using namespace llvm; - -+static mc::RegisterMCTargetOptionsFlags MOF; -+ - static cl::opt - InputFilename(cl::Positional, cl::desc(""), cl::init("-")); - -@@ -317,7 +319,7 @@ int main(int argc, char **argv) { - cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); - - cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); -- const MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); -+ const MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); - setDwarfDebugFlags(argc, argv); - - setDwarfDebugProducer(); -diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp -index fff5906bb59b..eca86768aa5c 100644 ---- a/llvm/tools/llvm-mca/llvm-mca.cpp -+++ b/llvm/tools/llvm-mca/llvm-mca.cpp -@@ -39,7 +39,7 @@ - #include "llvm/MC/MCObjectFileInfo.h" - #include "llvm/MC/MCRegisterInfo.h" - #include "llvm/MC/MCSubtargetInfo.h" --#include "llvm/MC/MCTargetOptionsCommandFlags.inc" -+#include "llvm/MC/MCTargetOptionsCommandFlags.h" - #include "llvm/MCA/CodeEmitter.h" - #include "llvm/MCA/Context.h" - #include "llvm/MCA/InstrBuilder.h" -@@ -62,6 +62,8 @@ - - using namespace llvm; - -+static mc::RegisterMCTargetOptionsFlags MOF; -+ - static cl::OptionCategory ToolOptions("Tool Options"); - static cl::OptionCategory ViewOptions("View Options"); - -@@ -353,7 +355,7 @@ int main(int argc, char **argv) { - std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); - assert(MRI && "Unable to create target register info!"); - -- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); -+ MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); - std::unique_ptr MAI( - TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); - assert(MAI && "Unable to create target asm info!"); -@@ -443,7 +445,7 @@ int main(int argc, char **argv) { - TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); - - std::unique_ptr MAB(TheTarget->createMCAsmBackend( -- *STI, *MRI, InitMCTargetOptionsFromFlags())); -+ *STI, *MRI, mc::InitMCTargetOptionsFromFlags())); - - for (const std::unique_ptr &Region : Regions) { - // Skip empty code regions. -diff --git a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp -index cd6c7d380cc6..e353e333f580 100644 ---- a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp -+++ b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp -@@ -12,7 +12,7 @@ - - #include "llvm/Bitcode/BitcodeReader.h" - #include "llvm/Bitcode/BitcodeWriter.h" --#include "llvm/CodeGen/CommandFlags.inc" -+#include "llvm/CodeGen/CommandFlags.h" - #include "llvm/FuzzMutate/FuzzerCLI.h" - #include "llvm/FuzzMutate/IRMutator.h" - #include "llvm/IR/Verifier.h" -@@ -24,6 +24,8 @@ - - using namespace llvm; - -+static codegen::RegisterCodeGenFlags CGF; -+ - static cl::opt - TargetTripleStr("mtriple", cl::desc("Override target triple for module")); - -@@ -124,7 +126,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - - M->setTargetTriple(TM->getTargetTriple().normalize()); - M->setDataLayout(TM->createDataLayout()); -- setFunctionAttributes(TM->getTargetCPU(), TM->getTargetFeatureString(), *M); -+ codegen::setFunctionAttributes(TM->getTargetCPU(), -+ TM->getTargetFeatureString(), *M); - - // Create pass pipeline - // -@@ -214,16 +217,17 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize( - - std::string Error; - const Target *TheTarget = -- TargetRegistry::lookupTarget(MArch, TargetTriple, Error); -+ TargetRegistry::lookupTarget(codegen::getMArch(), TargetTriple, Error); - if (!TheTarget) { - errs() << *argv[0] << ": " << Error; - exit(1); - } - -- TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - TM.reset(TheTarget->createTargetMachine( -- TargetTriple.getTriple(), getCPUStr(), getFeaturesStr(), -- Options, getRelocModel(), getCodeModel(), CodeGenOpt::Default)); -+ TargetTriple.getTriple(), codegen::getCPUStr(), codegen::getFeaturesStr(), -+ Options, codegen::getExplicitRelocModel(), -+ codegen::getExplicitCodeModel(), CodeGenOpt::Default)); - assert(TM && "Could not allocate target machine!"); - - // Check that pass pipeline is specified and correct -diff --git a/llvm/tools/lto/CMakeLists.txt b/llvm/tools/lto/CMakeLists.txt -index b86e4abd01a7..2963f97cad88 100644 ---- a/llvm/tools/lto/CMakeLists.txt -+++ b/llvm/tools/lto/CMakeLists.txt -@@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS - AllTargetsInfos - BitReader - Core -+ CodeGen - LTO - MC - MCDisassembler -@@ -20,7 +21,8 @@ set(SOURCES - - set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/lto.exports) - --add_llvm_library(LTO SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS intrinsics_gen) -+add_llvm_library(LTO SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS -+ intrinsics_gen) - - install(FILES ${LLVM_MAIN_INCLUDE_DIR}/llvm-c/lto.h - DESTINATION include/llvm-c -diff --git a/llvm/tools/lto/lto.cpp b/llvm/tools/lto/lto.cpp -index 9933af94de1e..6d207b76685f 100644 ---- a/llvm/tools/lto/lto.cpp -+++ b/llvm/tools/lto/lto.cpp -@@ -15,7 +15,7 @@ - #include "llvm/ADT/STLExtras.h" - #include "llvm/ADT/StringExtras.h" - #include "llvm/Bitcode/BitcodeReader.h" --#include "llvm/CodeGen/CommandFlags.inc" -+#include "llvm/CodeGen/CommandFlags.h" - #include "llvm/IR/DiagnosticInfo.h" - #include "llvm/IR/DiagnosticPrinter.h" - #include "llvm/IR/LLVMContext.h" -@@ -28,6 +28,10 @@ - #include "llvm/Support/TargetSelect.h" - #include "llvm/Support/raw_ostream.h" - -+using namespace llvm; -+ -+static codegen::RegisterCodeGenFlags CGF; -+ - // extra command-line flags needed for LTOCodeGenerator - static cl::opt - OptLevel("O", -@@ -154,14 +158,9 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LTOModule, lto_module_t) - // Convert the subtarget features into a string to pass to LTOCodeGenerator. - static void lto_add_attrs(lto_code_gen_t cg) { - LTOCodeGenerator *CG = unwrap(cg); -- if (MAttrs.size()) { -- std::string attrs; -- for (unsigned i = 0; i < MAttrs.size(); ++i) { -- if (i > 0) -- attrs.append(","); -- attrs.append(MAttrs[i]); -- } -- -+ auto MAttrs = codegen::getMAttrs(); -+ if (!MAttrs.empty()) { -+ std::string attrs = join(MAttrs, ","); - CG->setAttr(attrs); - } - -@@ -219,7 +218,7 @@ lto_module_is_object_file_in_memory_for_target(const void* mem, - - lto_module_t lto_module_create(const char* path) { - lto_initialize(); -- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - ErrorOr> M = - LTOModule::createFromFile(*LTOContext, StringRef(path), Options); - if (!M) -@@ -229,7 +228,7 @@ lto_module_t lto_module_create(const char* path) { - - lto_module_t lto_module_create_from_fd(int fd, const char *path, size_t size) { - lto_initialize(); -- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - ErrorOr> M = LTOModule::createFromOpenFile( - *LTOContext, fd, StringRef(path), size, Options); - if (!M) -@@ -242,7 +241,7 @@ lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path, - size_t map_size, - off_t offset) { - lto_initialize(); -- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - ErrorOr> M = LTOModule::createFromOpenFileSlice( - *LTOContext, fd, StringRef(path), map_size, offset, Options); - if (!M) -@@ -252,7 +251,7 @@ lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path, - - lto_module_t lto_module_create_from_memory(const void* mem, size_t length) { - lto_initialize(); -- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - ErrorOr> M = - LTOModule::createFromBuffer(*LTOContext, mem, length, Options); - if (!M) -@@ -264,7 +263,7 @@ lto_module_t lto_module_create_from_memory_with_path(const void* mem, - size_t length, - const char *path) { - lto_initialize(); -- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - ErrorOr> M = LTOModule::createFromBuffer( - *LTOContext, mem, length, Options, StringRef(path)); - if (!M) -@@ -275,7 +274,7 @@ lto_module_t lto_module_create_from_memory_with_path(const void* mem, - lto_module_t lto_module_create_in_local_context(const void *mem, size_t length, - const char *path) { - lto_initialize(); -- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - - // Create a local context. Ownership will be transferred to LTOModule. - std::unique_ptr Context = std::make_unique(); -@@ -294,7 +293,7 @@ lto_module_t lto_module_create_in_codegen_context(const void *mem, - const char *path, - lto_code_gen_t cg) { - lto_initialize(); -- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - ErrorOr> M = LTOModule::createFromBuffer( - unwrap(cg)->getContext(), mem, length, Options, StringRef(path)); - return wrap(M->release()); -@@ -336,7 +335,7 @@ void lto_codegen_set_diagnostic_handler(lto_code_gen_t cg, - static lto_code_gen_t createCodeGen(bool InLocalContext) { - lto_initialize(); - -- TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - - LibLTOCodeGenerator *CodeGen = - InLocalContext ? new LibLTOCodeGenerator(std::make_unique()) -@@ -484,7 +483,7 @@ void lto_codegen_set_should_embed_uselists(lto_code_gen_t cg, - thinlto_code_gen_t thinlto_create_codegen(void) { - lto_initialize(); - ThinLTOCodeGenerator *CodeGen = new ThinLTOCodeGenerator(); -- CodeGen->setTargetOptions(InitTargetOptionsFromCodeGenFlags()); -+ CodeGen->setTargetOptions(codegen::InitTargetOptionsFromCodeGenFlags()); - CodeGen->setFreestanding(EnableFreestanding); - - if (OptLevel.getNumOccurrences()) { -diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp -index 75a6cdc3892b..8d619ef76b7a 100644 ---- a/llvm/tools/opt/opt.cpp -+++ b/llvm/tools/opt/opt.cpp -@@ -22,7 +22,7 @@ - #include "llvm/Analysis/TargetLibraryInfo.h" - #include "llvm/Analysis/TargetTransformInfo.h" - #include "llvm/Bitcode/BitcodeWriterPass.h" --#include "llvm/CodeGen/CommandFlags.inc" -+#include "llvm/CodeGen/CommandFlags.h" - #include "llvm/CodeGen/TargetPassConfig.h" - #include "llvm/Config/llvm-config.h" - #include "llvm/IR/DataLayout.h" -@@ -61,6 +61,8 @@ - using namespace llvm; - using namespace opt_tool; - -+static codegen::RegisterCodeGenFlags CFG; -+ - // The OptimizationList is automatically populated with registered Passes by the - // PassNameParser. - // -@@ -470,16 +472,17 @@ static TargetMachine* GetTargetMachine(Triple TheTriple, StringRef CPUStr, - StringRef FeaturesStr, - const TargetOptions &Options) { - std::string Error; -- const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple, -- Error); -+ const Target *TheTarget = -+ TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error); - // Some modules don't specify a triple, and this is okay. - if (!TheTarget) { - return nullptr; - } - -- return TheTarget->createTargetMachine(TheTriple.getTriple(), CPUStr, -- FeaturesStr, Options, getRelocModel(), -- getCodeModel(), GetCodeGenOptLevel()); -+ return TheTarget->createTargetMachine( -+ TheTriple.getTriple(), codegen::getCPUStr(), codegen::getFeaturesStr(), -+ Options, codegen::getExplicitRelocModel(), -+ codegen::getExplicitCodeModel(), GetCodeGenOptLevel()); - } - - #ifdef BUILD_EXAMPLES -@@ -659,11 +662,11 @@ int main(int argc, char **argv) { - Triple ModuleTriple(M->getTargetTriple()); - std::string CPUStr, FeaturesStr; - TargetMachine *Machine = nullptr; -- const TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); -+ const TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); - - if (ModuleTriple.getArch()) { -- CPUStr = getCPUStr(); -- FeaturesStr = getFeaturesStr(); -+ CPUStr = codegen::getCPUStr(); -+ FeaturesStr = codegen::getFeaturesStr(); - Machine = GetTargetMachine(ModuleTriple, CPUStr, FeaturesStr, Options); - } else if (ModuleTriple.getArchName() != "unknown" && - ModuleTriple.getArchName() != "") { -@@ -676,7 +679,7 @@ int main(int argc, char **argv) { - - // Override function attributes based on CPUStr, FeaturesStr, and command line - // flags. -- setFunctionAttributes(CPUStr, FeaturesStr, *M); -+ codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M); - - // If the output is set to be emitted to standard out, and standard out is a - // console, print out a warning message and refuse to do it. We don't -diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp -index 472d4dd6ad1e..32d1d3c91ff2 100644 ---- a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp -+++ b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp -@@ -25,7 +25,7 @@ - #include "llvm/MC/MCRegisterInfo.h" - #include "llvm/MC/MCStreamer.h" - #include "llvm/MC/MCSubtargetInfo.h" --#include "llvm/MC/MCTargetOptionsCommandFlags.inc" -+#include "llvm/MC/MCTargetOptionsCommandFlags.h" - #include "llvm/PassAnalysisSupport.h" - #include "llvm/Support/TargetRegistry.h" - #include "llvm/Support/raw_ostream.h" -@@ -36,6 +36,8 @@ - using namespace llvm; - using namespace dwarf; - -+mc::RegisterMCTargetOptionsFlags MOF; -+ - namespace {} // end anonymous namespace - - //===----------------------------------------------------------------------===// -@@ -410,7 +412,7 @@ llvm::Error dwarfgen::Generator::init(Triple TheTriple, uint16_t V) { - TripleName, - inconvertibleErrorCode()); - -- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); -+ MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); - MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); - if (!MAI) - return make_error("no asm info for target " + TripleName, --- -2.33.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-basic-block-sections-support.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-basic-block-sections-support.patch deleted file mode 100644 index f90a79ae..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-basic-block-sections-support.patch +++ /dev/null @@ -1,237 +0,0 @@ -From d51fdb9f2986747a56c593fa057d531720b39deb Mon Sep 17 00:00:00 2001 -From: Sriraman Tallam -Date: Fri, 13 Mar 2020 15:58:57 -0700 -Subject: [PATCH] Basic Block Sections Support. - -This is the first in a series of patches to enable Basic Block Sections -in LLVM. - -We introduce a new compiler option, -fbasicblock-sections=, which places every -basic block in a unique ELF text section in the object file along with a -symbol labeling the basic block. The linker can then order the basic block -sections in any arbitrary sequence which when done correctly can encapsulate -block layout, function layout and function splitting optimizations. However, -there are a couple of challenges to be addressed for this to be feasible: - -1) The compiler must not allow any implicit fall-through between any two - adjacent basic blocks as they could be reordered at link time to be - non-adjacent. In other words, the compiler must make a fall-through - between adjacent basic blocks explicit by retaining the direct jump - instruction that jumps to the next basic block. These branches can only - be removed later by the linker after the blocks have been reordered. -2) All inter-basic block branch targets would now need to be resolved by - the linker as they cannot be calculated during compile time. This is - done using static relocations which bloats the size of the object files. - Further, the compiler tries to use short branch instructions on some ISAs - for branch offsets that can be accommodated in one byte. This is not - possible with basic block sections as the offset is not determined at - compile time, and long branch instructions have to be used everywhere. -3) Each additional section bloats object file sizes by tens of bytes. The - number of basic blocks can be potentially very large compared to the - size of functions and can bloat object sizes significantly. Option - fbasicblock-sections= also takes a file path which can be used to - specify a subset of basic blocks that needs unique sections to keep - the bloats small. -4) Debug Info and CFI need special handling and will be presented as - separate patches. - -Basic Block Labels - -With -fbasicblock-sections=labels, or when a basic block is placed in a -unique section, it is labelled with a symbol. This allows easy mapping of -virtual addresses from PMU profiles back to the corresponding basic blocks. -Since the number of basic blocks is large, the labeling bloats the symbol -table sizes and the string table sizes significantly. While the binary size -does increase, it does not affect performance as the symbol table is not -loaded in memory during run-time. The string table size bloat is kept very -minimal using a unary naming scheme that uses string suffix compression. -The basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ... -This turns out to be very good for string table sizes and the bloat in the -string table size for a very large binary is ~8 %. The naming also allows -using the --symbol-ordering-file option in LLD to arbitrarily reorder the -sections. - -Differential Revision: https://reviews.llvm.org/D68063 - -Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4dfe92e46542be46d634a7ec24da2f2f889623d0] -Signed-off-by: Anuj Mittal ---- - llvm/include/llvm/CodeGen/CommandFlags.inc | 34 ++++++++++++++++++++++ - llvm/include/llvm/Target/TargetMachine.h | 14 +++++++++ - llvm/include/llvm/Target/TargetOptions.h | 31 ++++++++++++++++++-- - 3 files changed, 76 insertions(+), 3 deletions(-) - -diff --git a/llvm/include/llvm/CodeGen/CommandFlags.inc b/llvm/include/llvm/CodeGen/CommandFlags.inc -index 8739b644873d..6475a5b19edb 100644 ---- a/llvm/include/llvm/CodeGen/CommandFlags.inc -+++ b/llvm/include/llvm/CodeGen/CommandFlags.inc -@@ -238,6 +238,12 @@ static cl::opt - cl::desc("Emit functions into separate sections"), - cl::init(false)); - -+static cl::opt -+ BBSections("basicblock-sections", -+ cl::desc("Emit basic blocks into separate sections"), -+ cl::value_desc("all | | labels | none"), -+ cl::init("none")); -+ - static cl::opt TLSSize("tls-size", - cl::desc("Bit size of immediate TLS offsets"), - cl::init(0)); -@@ -251,6 +257,11 @@ static cl::opt - cl::desc("Give unique names to every section"), - cl::init(true)); - -+static cl::opt UniqueBBSectionNames( -+ "unique-bb-section-names", -+ cl::desc("Give unique names to every basic block section"), -+ cl::init(false)); -+ - static cl::opt - EABIVersion("meabi", cl::desc("Set EABI type (default depends on triple):"), - cl::init(EABI::Default), -@@ -285,6 +296,27 @@ static cl::opt - cl::desc("Always emit a debug frame section."), - cl::init(false)); - -+static llvm::BasicBlockSection -+getBBSectionsMode(llvm::TargetOptions &Options) { -+ if (BBSections == "all") -+ return BasicBlockSection::All; -+ else if (BBSections == "labels") -+ return BasicBlockSection::Labels; -+ else if (BBSections == "none") -+ return BasicBlockSection::None; -+ else { -+ ErrorOr> MBOrErr = -+ MemoryBuffer::getFile(BBSections); -+ if (!MBOrErr) { -+ errs() << "Error loading basic block sections function list file: " -+ << MBOrErr.getError().message() << "\n"; -+ } else { -+ Options.BBSectionsFuncListBuf = std::move(*MBOrErr); -+ } -+ return BasicBlockSection::List; -+ } -+} -+ - // Common utility function tightly tied to the options listed here. Initializes - // a TargetOptions object with CodeGen flags and returns it. - static TargetOptions InitTargetOptionsFromCodeGenFlags() { -@@ -308,7 +340,9 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() { - Options.RelaxELFRelocations = RelaxELFRelocations; - Options.DataSections = DataSections; - Options.FunctionSections = FunctionSections; -+ Options.BBSections = getBBSectionsMode(Options); - Options.UniqueSectionNames = UniqueSectionNames; -+ Options.UniqueBBSectionNames = UniqueBBSectionNames; - Options.TLSSize = TLSSize; - Options.EmulatedTLS = EmulatedTLS; - Options.ExplicitEmulatedTLS = EmulatedTLS.getNumOccurrences() > 0; -diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h -index 176ae39b17a7..4a1f3377f31d 100644 ---- a/llvm/include/llvm/Target/TargetMachine.h -+++ b/llvm/include/llvm/Target/TargetMachine.h -@@ -242,6 +242,9 @@ public: - - bool getUniqueSectionNames() const { return Options.UniqueSectionNames; } - -+ /// Return true if unique basic block section names must be generated. -+ bool getUniqueBBSectionNames() const { return Options.UniqueBBSectionNames; } -+ - /// Return true if data objects should be emitted into their own section, - /// corresponds to -fdata-sections. - bool getDataSections() const { -@@ -254,6 +257,17 @@ public: - return Options.FunctionSections; - } - -+ /// If basic blocks should be emitted into their own section, -+ /// corresponding to -fbasicblock-sections. -+ llvm::BasicBlockSection getBBSectionsType() const { -+ return Options.BBSections; -+ } -+ -+ /// Get the list of functions and basic block ids that need unique sections. -+ const MemoryBuffer *getBBSectionsFuncListBuf() const { -+ return Options.BBSectionsFuncListBuf.get(); -+ } -+ - /// Get a \c TargetIRAnalysis appropriate for the target. - /// - /// This is used to construct the new pass manager's target IR analysis pass, -diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h -index 84c6ee2a6387..d27c7b0178f0 100644 ---- a/llvm/include/llvm/Target/TargetOptions.h -+++ b/llvm/include/llvm/Target/TargetOptions.h -@@ -16,8 +16,11 @@ - - #include "llvm/MC/MCTargetOptions.h" - -+#include -+ - namespace llvm { - class MachineFunction; -+ class MemoryBuffer; - class Module; - - namespace FloatABI { -@@ -63,6 +66,18 @@ namespace llvm { - }; - } - -+ enum class BasicBlockSection { -+ All, // Use Basic Block Sections for all basic blocks. A section -+ // for every basic block can significantly bloat object file sizes. -+ List, // Get list of functions & BBs from a file. Selectively enables -+ // basic block sections for a subset of basic blocks which can be -+ // used to control object size bloats from creating sections. -+ Labels, // Do not use Basic Block Sections but label basic blocks. This -+ // is useful when associating profile counts from virtual addresses -+ // to basic blocks. -+ None // Do not use Basic Block Sections. -+ }; -+ - enum class EABI { - Unknown, - Default, // Default means not specified -@@ -114,9 +129,9 @@ namespace llvm { - EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), - DisableIntegratedAS(false), RelaxELFRelocations(false), - FunctionSections(false), DataSections(false), -- UniqueSectionNames(true), TrapUnreachable(false), -- NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false), -- ExplicitEmulatedTLS(false), EnableIPRA(false), -+ UniqueSectionNames(true), UniqueBBSectionNames(false), -+ TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0), -+ EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false), - EmitStackSizeSection(false), EnableMachineOutliner(false), - SupportsDefaultOutlining(false), EmitAddrsig(false), - EnableDebugEntryValues(false), ForceDwarfFrameSection(false) {} -@@ -224,6 +239,9 @@ namespace llvm { - - unsigned UniqueSectionNames : 1; - -+ /// Use unique names for basic block sections. -+ unsigned UniqueBBSectionNames : 1; -+ - /// Emit target-specific trap instruction for 'unreachable' IR instructions. - unsigned TrapUnreachable : 1; - -@@ -256,6 +274,13 @@ namespace llvm { - /// Emit address-significance table. - unsigned EmitAddrsig : 1; - -+ /// Emit basic blocks into separate sections. -+ BasicBlockSection BBSections = BasicBlockSection::None; -+ -+ /// Memory Buffer that contains information on sampled basic blocks and used -+ /// to selectively generate basic block sections. -+ std::shared_ptr BBSectionsFuncListBuf; -+ - /// Emit debug info about parameter's entry values. - unsigned EnableDebugEntryValues : 1; - --- -2.33.1 - diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend index 9701aca6..42ccb1dd 100644 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend @@ -1,33 +1,5 @@ FILESEXTRAPATHS:prepend:intel-x86-common := "${THISDIR}/files:" -SPIRV_SRCREV = "fe4d6b767363a1995ccbfca27f79efb10dcfe110" - -SRC_URI_LLVM10_PATCHES = " \ - file://llvm10-0001-llvm-spirv-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \ - file://llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch;patchdir=llvm/projects/llvm-spirv \ - file://llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch;patchdir=llvm/projects/llvm-spirv \ - file://BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch;patchdir=llvm \ - file://IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch;patchdir=llvm \ - file://llvm10-0001-OpenCL-3.0-support.patch \ - file://llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch \ - file://llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch \ - file://llvm10-0004-Remove-repo-name-in-LLVM-IR.patch \ - file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \ - file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \ - file://llvm10-0007-support-cl_ext_float_atomics.patch \ - file://llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch \ - file://llvm10-0009-ispc-10_0_fix_for_1788.patch \ - file://llvm10-0010-ispc-10_0_fix_for_1793.patch \ - file://llvm10-0011-ispc-10_0_fix_for_1844.patch \ - file://llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch \ - file://llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch \ - file://llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch \ - file://llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch \ - file://llvm10-basic-block-sections-support.patch \ - file://llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch \ - file://llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch \ - " - SRC_URI_LLVM12_PATCHES = " \ file://llvm12-0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \ file://llvm12-0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \ @@ -36,10 +8,4 @@ SRC_URI_LLVM12_PATCHES = " \ file://llvm12-0005-ispc-12_0_fix_for_2111.patch \ " - -SPIRV_LLVM10_SRC_URI = "git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_100;destsuffix=git/llvm/projects/llvm-spirv;name=spirv" - -SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '10.0.1', ' ${SPIRV_LLVM10_SRC_URI} ${SRC_URI_LLVM10_PATCHES} ', '', d)}" SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '12.0.0', ' ${SRC_URI_LLVM12_PATCHES} ', '', d)}" - -SRCREV_spirv = "${@bb.utils.contains_any('LLVMVERSION', [ '13.0.0', '12.0.0' ], '', '${SPIRV_SRCREV}', d)}" diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch deleted file mode 100644 index 8ffa853b..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 7fc05c52dd91902fa324a7aac9b90715cfca4717 Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Wed, 15 Apr 2020 17:55:32 +0800 -Subject: [PATCH] Building in-tree with LLVM 10.0 with the LLVM_LINK_LLVM_DYLIB - -Failed to link with the LLVMSPIRVLib library. - -Add an explicit dependency to force the correct build order and linking. - -Reference: -https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/a6d4ccf082858e63e139ca06c02a071c343d2657 - -Upstream-Status: Submitted [https://github.com/intel/opencl-clang/pull/118] - -Signed-off-by: Naveen Saini ---- - CMakeLists.txt | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 51c140d..b8b514e 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -208,7 +208,7 @@ link_directories( - - set(OPENCL_CLANG_LINK_LIBS ${CMAKE_DL_LIBS}) - --if(NOT LLVMSPIRVLib IN_LIST LLVM_AVAILABLE_LIBS) -+if(NOT LLVMSPIRVLib IN_LIST LLVM_AVAILABLE_LIBS OR (USE_PREBUILT_LLVM AND LLVM_LINK_LLVM_DYLIB)) - # SPIRV-LLVM-Translator is not included into LLVM as a component. - # So, we need to list it here explicitly as an external library - list(APPEND OPENCL_CLANG_LINK_LIBS LLVMSPIRVLib) --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-make-sure-only-static-libraries-linked-for-native-bu.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-make-sure-only-static-libraries-linked-for-native-bu.patch deleted file mode 100644 index 473f4d24..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-make-sure-only-static-libraries-linked-for-native-bu.patch +++ /dev/null @@ -1,42 +0,0 @@ -From b29e00e6fe428a031cf577dfb703cf13eff837f6 Mon Sep 17 00:00:00 2001 -From: Naveen Saini -Date: Wed, 15 Apr 2020 18:05:14 +0800 -Subject: [PATCH 2/2] make sure only static libraries linked for native build - -LINK_COMPONENTS=all isn't working for static libs for out of tree builds. Use -LLVM_AVAILABLE_LIBS instead. Reported: - -https://github.com/intel/opencl-clang/issues/114 - -Upstream-Status: Pending - -Signed-off-by: Anuj Mittal -Signed-off-by: Naveen Saini ---- - CMakeLists.txt | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 8707487..ad2dbda 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -218,7 +218,7 @@ add_subdirectory(cl_headers) - - set(LLVM_REQUIRES_EH ON) - --if(USE_PREBUILT_LLVM OR CLANG_LINK_CLANG_DYLIB) -+if(false) - list(APPEND OPENCL_CLANG_LINK_LIBS clang-cpp) - else() - list(APPEND OPENCL_CLANG_LINK_LIBS -@@ -266,6 +266,7 @@ add_llvm_library(${TARGET_NAME} SHARED - all - LINK_LIBS - ${OPENCL_CLANG_LINK_LIBS} -+ ${LLVM_AVAILABLE_LIBS} - ) - - # Configure resource file on Windows --- -2.17.1 - diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_10.0.0.bb b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_10.0.0.bb deleted file mode 100644 index e08f2278..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_10.0.0.bb +++ /dev/null @@ -1,15 +0,0 @@ -require opencl-clang.inc - -SRC_URI:append = " file://0001-don-t-redefine-LLVM_TABLEGEN_EXE.patch \ - file://0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch \ - " -SRC_URI:append:class-native = " file://0002-make-sure-only-static-libraries-linked-for-native-bu.patch" - -BRANCH = "ocl-open-100" - -SRCREV = "c8cd72e32b6abc18ce6da71c357ea45ba78b52f0" - -EXTRA_OECMAKE += "\ - -DLLVM_TABLEGEN_EXE=${STAGING_BINDIR_NATIVE}/llvm-tblgen \ - -DCMAKE_SKIP_RPATH=TRUE \ - " -- cgit v1.2.3-54-g00ecf