From bdae07eceb51a038115f45a3aee2dd27323dd7b4 Mon Sep 17 00:00:00 2001 From: Naveen Saini Date: Thu, 8 Apr 2021 11:02:31 +0800 Subject: llvm-project-source: backport OpenCL recommended patches Updating SPIRV-LLVM-Translator srcrev to latest commits for * llvm_releae_100 * llvm_release_110 Backport opencl-clang recommended llvm/clang patches. llvm-10: https://github.com/intel/opencl-clang/tree/ocl-open-100/patches llvm-11: https://github.com/intel/opencl-clang/tree/ocl-open-110/patches Signed-off-by: Naveen Saini Signed-off-by: Anuj Mittal --- ...info-of-work-item-builtin-translation-745.patch | 119 + ...-Memory-leak-fix-for-Managed-Static-Mutex.patch | 35 + ...2-Add-cl_khr_extended_subgroup-extensions.patch | 812 ++ .../clang/files/llvm10-OpenCL-3.0-support.patch | 8259 +++++++++++++++++++ .../files/llvm10-Remove-repo-name-in-LLVM-IR.patch | 50 + .../clang/files/llvm11-OpenCL-3.0-support.patch | 8453 ++++++++++++++++++++ .../files/llvm11-Remove-repo-name-in-LLVM-IR.patch | 50 + .../clang/llvm-project-source.bbappend | 12 +- 8 files changed, 17788 insertions(+), 2 deletions(-) create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Fix-debug-info-of-work-item-builtin-translation-745.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-Add-cl_khr_extended_subgroup-extensions.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-OpenCL-3.0-support.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Remove-repo-name-in-LLVM-IR.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-OpenCL-3.0-support.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-Remove-repo-name-in-LLVM-IR.patch diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Fix-debug-info-of-work-item-builtin-translation-745.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Fix-debug-info-of-work-item-builtin-translation-745.patch new file mode 100644 index 00000000..923b871f --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Fix-debug-info-of-work-item-builtin-translation-745.patch @@ -0,0 +1,119 @@ +From 200c200eb19602ffd7c8f29d0b2df9df1fd311bf Mon Sep 17 00:00:00 2001 +From: Naveen Saini +Date: Wed, 7 Apr 2021 17:44:20 +0800 +Subject: [PATCH] Fix debug info of work-item builtin translation (#745) + +debug info of work-item builtins are lost in both llvm IR -> spirv and +spirv -> llvm IR translations. See #744 + +Upstream-Status: Backport [https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/c734c5c8bbd3012a09c610e4be68e90cc603c580] +Signed-off-by: Wenju He +Signed-off-by: Naveen Saini +--- + lib/SPIRV/OCL20ToSPIRV.cpp | 5 ++- + lib/SPIRV/SPIRVReader.cpp | 1 + + test/DebugInfo/builtin-get-global-id.ll | 60 +++++++++++++++++++++++++ + 3 files changed, 65 insertions(+), 1 deletion(-) + create mode 100644 test/DebugInfo/builtin-get-global-id.ll + +diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp +index 1262c48c..a742c8cf 100644 +--- a/lib/SPIRV/OCL20ToSPIRV.cpp ++++ b/lib/SPIRV/OCL20ToSPIRV.cpp +@@ -1297,11 +1297,14 @@ void OCL20ToSPIRV::transWorkItemBuiltinsToVariables() { + for (auto UI = I.user_begin(), UE = I.user_end(); UI != UE; ++UI) { + auto CI = dyn_cast(*UI); + assert(CI && "invalid instruction"); +- Value *NewValue = new LoadInst(BV, "", CI); ++ const DebugLoc &DLoc = CI->getDebugLoc(); ++ Instruction *NewValue = new LoadInst(BV, "", CI); ++ NewValue->setDebugLoc(DLoc); + LLVM_DEBUG(dbgs() << "Transform: " << *CI << " => " << *NewValue << '\n'); + if (IsVec) { + NewValue = + ExtractElementInst::Create(NewValue, CI->getArgOperand(0), "", CI); ++ NewValue->setDebugLoc(DLoc); + LLVM_DEBUG(dbgs() << *NewValue << '\n'); + } + NewValue->takeName(CI); +diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp +index 16a3dd38..528f6663 100644 +--- a/lib/SPIRV/SPIRVReader.cpp ++++ b/lib/SPIRV/SPIRVReader.cpp +@@ -307,6 +307,7 @@ bool SPIRVToLLVM::transOCLBuiltinFromVariable(GlobalVariable *GV, + auto Replace = [&](std::vector Arg, Instruction *I) { + auto Call = CallInst::Create(Func, Arg, "", I); + Call->takeName(I); ++ Call->setDebugLoc(I->getDebugLoc()); + setAttrByCalledFunc(Call); + SPIRVDBG(dbgs() << "[transOCLBuiltinFromVariable] " << *I << " -> " << *Call + << '\n';) +diff --git a/test/DebugInfo/builtin-get-global-id.ll b/test/DebugInfo/builtin-get-global-id.ll +new file mode 100644 +index 00000000..a4a00e63 +--- /dev/null ++++ b/test/DebugInfo/builtin-get-global-id.ll +@@ -0,0 +1,60 @@ ++; Check debug info of builtin get_global_id is preserved from LLVM IR to spirv ++; and spirv to LLVM IR translation. ++ ++; Original .cl source: ++; kernel void test() { ++; size_t gid = get_global_id(0); ++; } ++ ++; Command line: ++; ./clang -cc1 1.cl -triple spir64 -cl-std=cl2.0 -emit-llvm -finclude-default-header -debug-info-kind=line-tables-only -O0 ++ ++; RUN: llvm-as %s -o %t.bc ++; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s --check-prefix CHECK-SPIRV ++; RUN: llvm-spirv %t.bc -o %t.spv ++; RUN: llvm-spirv -r %t.spv -o - | llvm-dis -o - | FileCheck %s ++ ++target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" ++target triple = "spir64" ++ ++; CHECK-SPIRV: ExtInst {{.*}} DebugScope ++; CHECK-SPIRV-NEXT: Line {{[0-9]+}} 2 16 ++; CHECK-SPIRV-NEXT: Load {{[0-9]+}} [[LoadRes:[0-9]+]] ++; CHECK-SPIRV-NEXT: CompositeExtract {{[0-9]+}} {{[0-9]+}} [[LoadRes]] 0 ++ ++; Function Attrs: convergent noinline norecurse nounwind optnone ++define spir_kernel void @test() #0 !dbg !7 !kernel_arg_addr_space !2 !kernel_arg_access_qual !2 !kernel_arg_type !2 !kernel_arg_base_type !2 !kernel_arg_type_qual !2 { ++entry: ++ %gid = alloca i64, align 8 ++ %call = call spir_func i64 @_Z13get_global_idj(i32 0) #2, !dbg !10 ++; CHECK: %call = call spir_func i64 @_Z13get_global_idj(i32 0) #1, !dbg [[DBG:![0-9]+]] ++ store i64 %call, i64* %gid, align 8, !dbg !11 ++ ret void, !dbg !12 ++} ++ ++; Function Attrs: convergent nounwind readnone ++declare spir_func i64 @_Z13get_global_idj(i32) #1 ++ ++attributes #0 = { convergent noinline norecurse nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #2 = { convergent nounwind readnone } ++ ++!llvm.dbg.cu = !{!0} ++!llvm.module.flags = !{!3, !4} ++!opencl.ocl.version = !{!5} ++!opencl.spir.version = !{!5} ++!llvm.ident = !{!6} ++ ++!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project.git b5bc56da8aa23dc57db9d286b0591dbcf9b1bdd3)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None) ++!1 = !DIFile(filename: "", directory: "") ++!2 = !{} ++!3 = !{i32 2, !"Debug Info Version", i32 3} ++!4 = !{i32 1, !"wchar_size", i32 4} ++!5 = !{i32 2, i32 0} ++!6 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git b5bc56da8aa23dc57db9d286b0591dbcf9b1bdd3)"} ++!7 = distinct !DISubprogram(name: "test", scope: !8, file: !8, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) ++!8 = !DIFile(filename: "1.cl", directory: "") ++!9 = !DISubroutineType(types: !2) ++!10 = !DILocation(line: 2, column: 16, scope: !7) ++!11 = !DILocation(line: 2, column: 10, scope: !7) ++!12 = !DILocation(line: 3, column: 1, scope: !7) +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch new file mode 100644 index 00000000..103dad5e --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch @@ -0,0 +1,35 @@ +From c86c43b70e029b102543e8a85d269cbeb5c00279 Mon Sep 17 00:00:00 2001 +From: juanrod2 <> +Date: Tue, 22 Dec 2020 08:33:08 +0800 +Subject: [PATCH] Memory leak fix for Managed Static Mutex + +Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch] + +Signed-off-by: Naveen Saini + +Cleaning a mutex inside ManagedStatic llvm class. +--- + llvm/lib/Support/ManagedStatic.cpp | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Support/ManagedStatic.cpp b/llvm/lib/Support/ManagedStatic.cpp +index 053493f72fb5..6571580ccecf 100644 +--- a/llvm/lib/Support/ManagedStatic.cpp ++++ b/llvm/lib/Support/ManagedStatic.cpp +@@ -76,8 +76,12 @@ void ManagedStaticBase::destroy() const { + + /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. + void llvm::llvm_shutdown() { +- std::lock_guard Lock(*getManagedStaticMutex()); ++ getManagedStaticMutex()->lock(); + + while (StaticList) + StaticList->destroy(); ++ ++ getManagedStaticMutex()->unlock(); ++ delete ManagedStaticMutex; ++ ManagedStaticMutex = nullptr; + } +-- +2.29.2 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-Add-cl_khr_extended_subgroup-extensions.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-Add-cl_khr_extended_subgroup-extensions.patch new file mode 100644 index 00000000..cbe492c4 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-Add-cl_khr_extended_subgroup-extensions.patch @@ -0,0 +1,812 @@ +From 27d47f1a17c8921b07acc8cdc26e38cc609de4a9 Mon Sep 17 00:00:00 2001 +From: Naveen Saini +Date: Wed, 7 Apr 2021 16:38:38 +0800 +Subject: [PATCH 2/2] Add cl_khr_extended_subgroup extensions. + +Added extensions and their function declarations into +the standard header. + +Patch by Piotr Fusik! + +Tags: #clang + +Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4a4402f0d72167477a6252e4c3daf5089ebc8f9a] +Signed-off-by: Anastasia Stulova +Signed-off-by: Naveen Saini +--- + .../include/clang/Basic/OpenCLExtensions.def | 7 + + clang/lib/Headers/opencl-c.h | 668 ++++++++++++++++++ + clang/test/SemaOpenCL/extension-version.cl | 83 +++ + 3 files changed, 758 insertions(+) + +diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def +index 608f78a13eef..d1574164f9b2 100644 +--- a/clang/include/clang/Basic/OpenCLExtensions.def ++++ b/clang/include/clang/Basic/OpenCLExtensions.def +@@ -74,6 +74,13 @@ OPENCLEXT_INTERNAL(cl_khr_mipmap_image_writes, 200, ~0U) + OPENCLEXT_INTERNAL(cl_khr_srgb_image_writes, 200, ~0U) + OPENCLEXT_INTERNAL(cl_khr_subgroups, 200, ~0U) + OPENCLEXT_INTERNAL(cl_khr_terminate_context, 200, ~0U) ++OPENCLEXT_INTERNAL(cl_khr_subgroup_extended_types, 200, ~0U) ++OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_vote, 200, ~0U) ++OPENCLEXT_INTERNAL(cl_khr_subgroup_ballot, 200, ~0U) ++OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_arithmetic, 200, ~0U) ++OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle, 200, ~0U) ++OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle_relative, 200, ~0U) ++OPENCLEXT_INTERNAL(cl_khr_subgroup_clustered_reduce, 200, ~0U) + + // Clang Extensions. + OPENCLEXT_INTERNAL(cl_clang_storage_class_specifiers, 100, ~0U) +diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h +index 93a946cec5b1..67d900eb1c3d 100644 +--- a/clang/lib/Headers/opencl-c.h ++++ b/clang/lib/Headers/opencl-c.h +@@ -17530,6 +17530,674 @@ double __ovld __conv sub_group_scan_inclusive_max(double x); + + #endif //cl_khr_subgroups cl_intel_subgroups + ++#if defined(cl_khr_subgroup_extended_types) ++char __ovld __conv sub_group_broadcast( char value, uint index ); ++char2 __ovld __conv sub_group_broadcast( char2 value, uint index ); ++char3 __ovld __conv sub_group_broadcast( char3 value, uint index ); ++char4 __ovld __conv sub_group_broadcast( char4 value, uint index ); ++char8 __ovld __conv sub_group_broadcast( char8 value, uint index ); ++char16 __ovld __conv sub_group_broadcast( char16 value, uint index ); ++ ++uchar __ovld __conv sub_group_broadcast( uchar value, uint index ); ++uchar2 __ovld __conv sub_group_broadcast( uchar2 value, uint index ); ++uchar3 __ovld __conv sub_group_broadcast( uchar3 value, uint index ); ++uchar4 __ovld __conv sub_group_broadcast( uchar4 value, uint index ); ++uchar8 __ovld __conv sub_group_broadcast( uchar8 value, uint index ); ++uchar16 __ovld __conv sub_group_broadcast( uchar16 value, uint index ); ++ ++short __ovld __conv sub_group_broadcast( short value, uint index ); ++short2 __ovld __conv sub_group_broadcast( short2 value, uint index ); ++short3 __ovld __conv sub_group_broadcast( short3 value, uint index ); ++short4 __ovld __conv sub_group_broadcast( short4 value, uint index ); ++short8 __ovld __conv sub_group_broadcast( short8 value, uint index ); ++short16 __ovld __conv sub_group_broadcast( short16 value, uint index ); ++ ++ushort __ovld __conv sub_group_broadcast( ushort value, uint index ); ++ushort2 __ovld __conv sub_group_broadcast( ushort2 value, uint index ); ++ushort3 __ovld __conv sub_group_broadcast( ushort3 value, uint index ); ++ushort4 __ovld __conv sub_group_broadcast( ushort4 value, uint index ); ++ushort8 __ovld __conv sub_group_broadcast( ushort8 value, uint index ); ++ushort16 __ovld __conv sub_group_broadcast( ushort16 value, uint index ); ++ ++// scalar int broadcast is part of cl_khr_subgroups ++int2 __ovld __conv sub_group_broadcast( int2 value, uint index ); ++int3 __ovld __conv sub_group_broadcast( int3 value, uint index ); ++int4 __ovld __conv sub_group_broadcast( int4 value, uint index ); ++int8 __ovld __conv sub_group_broadcast( int8 value, uint index ); ++int16 __ovld __conv sub_group_broadcast( int16 value, uint index ); ++ ++// scalar uint broadcast is part of cl_khr_subgroups ++uint2 __ovld __conv sub_group_broadcast( uint2 value, uint index ); ++uint3 __ovld __conv sub_group_broadcast( uint3 value, uint index ); ++uint4 __ovld __conv sub_group_broadcast( uint4 value, uint index ); ++uint8 __ovld __conv sub_group_broadcast( uint8 value, uint index ); ++uint16 __ovld __conv sub_group_broadcast( uint16 value, uint index ); ++ ++// scalar long broadcast is part of cl_khr_subgroups ++long2 __ovld __conv sub_group_broadcast( long2 value, uint index ); ++long3 __ovld __conv sub_group_broadcast( long3 value, uint index ); ++long4 __ovld __conv sub_group_broadcast( long4 value, uint index ); ++long8 __ovld __conv sub_group_broadcast( long8 value, uint index ); ++long16 __ovld __conv sub_group_broadcast( long16 value, uint index ); ++ ++// scalar ulong broadcast is part of cl_khr_subgroups ++ulong2 __ovld __conv sub_group_broadcast( ulong2 value, uint index ); ++ulong3 __ovld __conv sub_group_broadcast( ulong3 value, uint index ); ++ulong4 __ovld __conv sub_group_broadcast( ulong4 value, uint index ); ++ulong8 __ovld __conv sub_group_broadcast( ulong8 value, uint index ); ++ulong16 __ovld __conv sub_group_broadcast( ulong16 value, uint index ); ++ ++// scalar float broadcast is part of cl_khr_subgroups ++float2 __ovld __conv sub_group_broadcast( float2 value, uint index ); ++float3 __ovld __conv sub_group_broadcast( float3 value, uint index ); ++float4 __ovld __conv sub_group_broadcast( float4 value, uint index ); ++float8 __ovld __conv sub_group_broadcast( float8 value, uint index ); ++float16 __ovld __conv sub_group_broadcast( float16 value, uint index ); ++ ++char __ovld __conv sub_group_reduce_add( char value ); ++uchar __ovld __conv sub_group_reduce_add( uchar value ); ++short __ovld __conv sub_group_reduce_add( short value ); ++ushort __ovld __conv sub_group_reduce_add( ushort value ); ++ ++char __ovld __conv sub_group_reduce_min( char value ); ++uchar __ovld __conv sub_group_reduce_min( uchar value ); ++short __ovld __conv sub_group_reduce_min( short value ); ++ushort __ovld __conv sub_group_reduce_min( ushort value ); ++ ++char __ovld __conv sub_group_reduce_max( char value ); ++uchar __ovld __conv sub_group_reduce_max( uchar value ); ++short __ovld __conv sub_group_reduce_max( short value ); ++ushort __ovld __conv sub_group_reduce_max( ushort value ); ++ ++char __ovld __conv sub_group_scan_inclusive_add( char value ); ++uchar __ovld __conv sub_group_scan_inclusive_add( uchar value ); ++short __ovld __conv sub_group_scan_inclusive_add( short value ); ++ushort __ovld __conv sub_group_scan_inclusive_add( ushort value ); ++ ++char __ovld __conv sub_group_scan_inclusive_min( char value ); ++uchar __ovld __conv sub_group_scan_inclusive_min( uchar value ); ++short __ovld __conv sub_group_scan_inclusive_min( short value ); ++ushort __ovld __conv sub_group_scan_inclusive_min( ushort value ); ++ ++char __ovld __conv sub_group_scan_inclusive_max( char value ); ++uchar __ovld __conv sub_group_scan_inclusive_max( uchar value ); ++short __ovld __conv sub_group_scan_inclusive_max( short value ); ++ushort __ovld __conv sub_group_scan_inclusive_max( ushort value ); ++ ++char __ovld __conv sub_group_scan_exclusive_add( char value ); ++uchar __ovld __conv sub_group_scan_exclusive_add( uchar value ); ++short __ovld __conv sub_group_scan_exclusive_add( short value ); ++ushort __ovld __conv sub_group_scan_exclusive_add( ushort value ); ++ ++char __ovld __conv sub_group_scan_exclusive_min( char value ); ++uchar __ovld __conv sub_group_scan_exclusive_min( uchar value ); ++short __ovld __conv sub_group_scan_exclusive_min( short value ); ++ushort __ovld __conv sub_group_scan_exclusive_min( ushort value ); ++ ++char __ovld __conv sub_group_scan_exclusive_max( char value ); ++uchar __ovld __conv sub_group_scan_exclusive_max( uchar value ); ++short __ovld __conv sub_group_scan_exclusive_max( short value ); ++ushort __ovld __conv sub_group_scan_exclusive_max( ushort value ); ++ ++#if defined(cl_khr_fp16) ++// scalar half broadcast is part of cl_khr_subgroups ++half2 __ovld __conv sub_group_broadcast( half2 value, uint index ); ++half3 __ovld __conv sub_group_broadcast( half3 value, uint index ); ++half4 __ovld __conv sub_group_broadcast( half4 value, uint index ); ++half8 __ovld __conv sub_group_broadcast( half8 value, uint index ); ++half16 __ovld __conv sub_group_broadcast( half16 value, uint index ); ++#endif // cl_khr_fp16 ++ ++#if defined(cl_khr_fp64) ++// scalar double broadcast is part of cl_khr_subgroups ++double2 __ovld __conv sub_group_broadcast( double2 value, uint index ); ++double3 __ovld __conv sub_group_broadcast( double3 value, uint index ); ++double4 __ovld __conv sub_group_broadcast( double4 value, uint index ); ++double8 __ovld __conv sub_group_broadcast( double8 value, uint index ); ++double16 __ovld __conv sub_group_broadcast( double16 value, uint index ); ++#endif // cl_khr_fp64 ++ ++#endif // cl_khr_subgroup_extended_types ++ ++#if defined(cl_khr_subgroup_non_uniform_vote) ++int __ovld sub_group_elect(void); ++int __ovld sub_group_non_uniform_all( int predicate ); ++int __ovld sub_group_non_uniform_any( int predicate ); ++ ++int __ovld sub_group_non_uniform_all_equal( char value ); ++int __ovld sub_group_non_uniform_all_equal( uchar value ); ++int __ovld sub_group_non_uniform_all_equal( short value ); ++int __ovld sub_group_non_uniform_all_equal( ushort value ); ++int __ovld sub_group_non_uniform_all_equal( int value ); ++int __ovld sub_group_non_uniform_all_equal( uint value ); ++int __ovld sub_group_non_uniform_all_equal( long value ); ++int __ovld sub_group_non_uniform_all_equal( ulong value ); ++int __ovld sub_group_non_uniform_all_equal( float value ); ++ ++#if defined(cl_khr_fp16) ++int __ovld sub_group_non_uniform_all_equal( half value ); ++#endif // cl_khr_fp16 ++ ++#if defined(cl_khr_fp64) ++int __ovld sub_group_non_uniform_all_equal( double value ); ++#endif // cl_khr_fp64 ++ ++#endif // cl_khr_subgroup_non_uniform_vote ++ ++#if defined(cl_khr_subgroup_ballot) ++char __ovld sub_group_non_uniform_broadcast( char value, uint index ); ++char2 __ovld sub_group_non_uniform_broadcast( char2 value, uint index ); ++char3 __ovld sub_group_non_uniform_broadcast( char3 value, uint index ); ++char4 __ovld sub_group_non_uniform_broadcast( char4 value, uint index ); ++char8 __ovld sub_group_non_uniform_broadcast( char8 value, uint index ); ++char16 __ovld sub_group_non_uniform_broadcast( char16 value, uint index ); ++ ++uchar __ovld sub_group_non_uniform_broadcast( uchar value, uint index ); ++uchar2 __ovld sub_group_non_uniform_broadcast( uchar2 value, uint index ); ++uchar3 __ovld sub_group_non_uniform_broadcast( uchar3 value, uint index ); ++uchar4 __ovld sub_group_non_uniform_broadcast( uchar4 value, uint index ); ++uchar8 __ovld sub_group_non_uniform_broadcast( uchar8 value, uint index ); ++uchar16 __ovld sub_group_non_uniform_broadcast( uchar16 value, uint index ); ++ ++short __ovld sub_group_non_uniform_broadcast( short value, uint index ); ++short2 __ovld sub_group_non_uniform_broadcast( short2 value, uint index ); ++short3 __ovld sub_group_non_uniform_broadcast( short3 value, uint index ); ++short4 __ovld sub_group_non_uniform_broadcast( short4 value, uint index ); ++short8 __ovld sub_group_non_uniform_broadcast( short8 value, uint index ); ++short16 __ovld sub_group_non_uniform_broadcast( short16 value, uint index ); ++ ++ushort __ovld sub_group_non_uniform_broadcast( ushort value, uint index ); ++ushort2 __ovld sub_group_non_uniform_broadcast( ushort2 value, uint index ); ++ushort3 __ovld sub_group_non_uniform_broadcast( ushort3 value, uint index ); ++ushort4 __ovld sub_group_non_uniform_broadcast( ushort4 value, uint index ); ++ushort8 __ovld sub_group_non_uniform_broadcast( ushort8 value, uint index ); ++ushort16 __ovld sub_group_non_uniform_broadcast( ushort16 value, uint index ); ++ ++int __ovld sub_group_non_uniform_broadcast( int value, uint index ); ++int2 __ovld sub_group_non_uniform_broadcast( int2 value, uint index ); ++int3 __ovld sub_group_non_uniform_broadcast( int3 value, uint index ); ++int4 __ovld sub_group_non_uniform_broadcast( int4 value, uint index ); ++int8 __ovld sub_group_non_uniform_broadcast( int8 value, uint index ); ++int16 __ovld sub_group_non_uniform_broadcast( int16 value, uint index ); ++ ++uint __ovld sub_group_non_uniform_broadcast( uint value, uint index ); ++uint2 __ovld sub_group_non_uniform_broadcast( uint2 value, uint index ); ++uint3 __ovld sub_group_non_uniform_broadcast( uint3 value, uint index ); ++uint4 __ovld sub_group_non_uniform_broadcast( uint4 value, uint index ); ++uint8 __ovld sub_group_non_uniform_broadcast( uint8 value, uint index ); ++uint16 __ovld sub_group_non_uniform_broadcast( uint16 value, uint index ); ++ ++long __ovld sub_group_non_uniform_broadcast( long value, uint index ); ++long2 __ovld sub_group_non_uniform_broadcast( long2 value, uint index ); ++long3 __ovld sub_group_non_uniform_broadcast( long3 value, uint index ); ++long4 __ovld sub_group_non_uniform_broadcast( long4 value, uint index ); ++long8 __ovld sub_group_non_uniform_broadcast( long8 value, uint index ); ++long16 __ovld sub_group_non_uniform_broadcast( long16 value, uint index ); ++ ++ulong __ovld sub_group_non_uniform_broadcast( ulong value, uint index ); ++ulong2 __ovld sub_group_non_uniform_broadcast( ulong2 value, uint index ); ++ulong3 __ovld sub_group_non_uniform_broadcast( ulong3 value, uint index ); ++ulong4 __ovld sub_group_non_uniform_broadcast( ulong4 value, uint index ); ++ulong8 __ovld sub_group_non_uniform_broadcast( ulong8 value, uint index ); ++ulong16 __ovld sub_group_non_uniform_broadcast( ulong16 value, uint index ); ++ ++float __ovld sub_group_non_uniform_broadcast( float value, uint index ); ++float2 __ovld sub_group_non_uniform_broadcast( float2 value, uint index ); ++float3 __ovld sub_group_non_uniform_broadcast( float3 value, uint index ); ++float4 __ovld sub_group_non_uniform_broadcast( float4 value, uint index ); ++float8 __ovld sub_group_non_uniform_broadcast( float8 value, uint index ); ++float16 __ovld sub_group_non_uniform_broadcast( float16 value, uint index ); ++ ++char __ovld sub_group_broadcast_first( char value ); ++uchar __ovld sub_group_broadcast_first( uchar value ); ++short __ovld sub_group_broadcast_first( short value ); ++ushort __ovld sub_group_broadcast_first( ushort value ); ++int __ovld sub_group_broadcast_first( int value ); ++uint __ovld sub_group_broadcast_first( uint value ); ++long __ovld sub_group_broadcast_first( long value ); ++ulong __ovld sub_group_broadcast_first( ulong value ); ++float __ovld sub_group_broadcast_first( float value ); ++ ++uint4 __ovld sub_group_ballot( int predicate ); ++int __ovld __cnfn sub_group_inverse_ballot( uint4 value ); ++int __ovld __cnfn sub_group_ballot_bit_extract( uint4 value, uint index ); ++uint __ovld __cnfn sub_group_ballot_bit_count( uint4 value ); ++ ++uint __ovld sub_group_ballot_inclusive_scan( uint4 value ); ++uint __ovld sub_group_ballot_exclusive_scan( uint4 value ); ++uint __ovld sub_group_ballot_find_lsb( uint4 value ); ++uint __ovld sub_group_ballot_find_msb( uint4 value ); ++ ++uint4 __ovld __cnfn get_sub_group_eq_mask(void); ++uint4 __ovld __cnfn get_sub_group_ge_mask(void); ++uint4 __ovld __cnfn get_sub_group_gt_mask(void); ++uint4 __ovld __cnfn get_sub_group_le_mask(void); ++uint4 __ovld __cnfn get_sub_group_lt_mask(void); ++ ++#if defined(cl_khr_fp16) ++half __ovld sub_group_non_uniform_broadcast( half value, uint index ); ++half2 __ovld sub_group_non_uniform_broadcast( half2 value, uint index ); ++half3 __ovld sub_group_non_uniform_broadcast( half3 value, uint index ); ++half4 __ovld sub_group_non_uniform_broadcast( half4 value, uint index ); ++half8 __ovld sub_group_non_uniform_broadcast( half8 value, uint index ); ++half16 __ovld sub_group_non_uniform_broadcast( half16 value, uint index ); ++ ++half __ovld sub_group_broadcast_first( half value ); ++#endif // cl_khr_fp16 ++ ++#if defined(cl_khr_fp64) ++double __ovld sub_group_non_uniform_broadcast( double value, uint index ); ++double2 __ovld sub_group_non_uniform_broadcast( double2 value, uint index ); ++double3 __ovld sub_group_non_uniform_broadcast( double3 value, uint index ); ++double4 __ovld sub_group_non_uniform_broadcast( double4 value, uint index ); ++double8 __ovld sub_group_non_uniform_broadcast( double8 value, uint index ); ++double16 __ovld sub_group_non_uniform_broadcast( double16 value, uint index ); ++ ++double __ovld sub_group_broadcast_first( double value ); ++#endif // cl_khr_fp64 ++ ++#endif // cl_khr_subgroup_ballot ++ ++#if defined(cl_khr_subgroup_non_uniform_arithmetic) ++char __ovld sub_group_non_uniform_reduce_add( char value ); ++uchar __ovld sub_group_non_uniform_reduce_add( uchar value ); ++short __ovld sub_group_non_uniform_reduce_add( short value ); ++ushort __ovld sub_group_non_uniform_reduce_add( ushort value ); ++int __ovld sub_group_non_uniform_reduce_add( int value ); ++uint __ovld sub_group_non_uniform_reduce_add( uint value ); ++long __ovld sub_group_non_uniform_reduce_add( long value ); ++ulong __ovld sub_group_non_uniform_reduce_add( ulong value ); ++float __ovld sub_group_non_uniform_reduce_add( float value ); ++ ++char __ovld sub_group_non_uniform_reduce_mul( char value ); ++uchar __ovld sub_group_non_uniform_reduce_mul( uchar value ); ++short __ovld sub_group_non_uniform_reduce_mul( short value ); ++ushort __ovld sub_group_non_uniform_reduce_mul( ushort value ); ++int __ovld sub_group_non_uniform_reduce_mul( int value ); ++uint __ovld sub_group_non_uniform_reduce_mul( uint value ); ++long __ovld sub_group_non_uniform_reduce_mul( long value ); ++ulong __ovld sub_group_non_uniform_reduce_mul( ulong value ); ++float __ovld sub_group_non_uniform_reduce_mul( float value ); ++ ++char __ovld sub_group_non_uniform_reduce_min( char value ); ++uchar __ovld sub_group_non_uniform_reduce_min( uchar value ); ++short __ovld sub_group_non_uniform_reduce_min( short value ); ++ushort __ovld sub_group_non_uniform_reduce_min( ushort value ); ++int __ovld sub_group_non_uniform_reduce_min( int value ); ++uint __ovld sub_group_non_uniform_reduce_min( uint value ); ++long __ovld sub_group_non_uniform_reduce_min( long value ); ++ulong __ovld sub_group_non_uniform_reduce_min( ulong value ); ++float __ovld sub_group_non_uniform_reduce_min( float value ); ++ ++char __ovld sub_group_non_uniform_reduce_max( char value ); ++uchar __ovld sub_group_non_uniform_reduce_max( uchar value ); ++short __ovld sub_group_non_uniform_reduce_max( short value ); ++ushort __ovld sub_group_non_uniform_reduce_max( ushort value ); ++int __ovld sub_group_non_uniform_reduce_max( int value ); ++uint __ovld sub_group_non_uniform_reduce_max( uint value ); ++long __ovld sub_group_non_uniform_reduce_max( long value ); ++ulong __ovld sub_group_non_uniform_reduce_max( ulong value ); ++float __ovld sub_group_non_uniform_reduce_max( float value ); ++ ++char __ovld sub_group_non_uniform_scan_inclusive_add( char value ); ++uchar __ovld sub_group_non_uniform_scan_inclusive_add( uchar value ); ++short __ovld sub_group_non_uniform_scan_inclusive_add( short value ); ++ushort __ovld sub_group_non_uniform_scan_inclusive_add( ushort value ); ++int __ovld sub_group_non_uniform_scan_inclusive_add( int value ); ++uint __ovld sub_group_non_uniform_scan_inclusive_add( uint value ); ++long __ovld sub_group_non_uniform_scan_inclusive_add( long value ); ++ulong __ovld sub_group_non_uniform_scan_inclusive_add( ulong value ); ++float __ovld sub_group_non_uniform_scan_inclusive_add( float value ); ++ ++char __ovld sub_group_non_uniform_scan_inclusive_mul( char value ); ++uchar __ovld sub_group_non_uniform_scan_inclusive_mul( uchar value ); ++short __ovld sub_group_non_uniform_scan_inclusive_mul( short value ); ++ushort __ovld sub_group_non_uniform_scan_inclusive_mul( ushort value ); ++int __ovld sub_group_non_uniform_scan_inclusive_mul( int value ); ++uint __ovld sub_group_non_uniform_scan_inclusive_mul( uint value ); ++long __ovld sub_group_non_uniform_scan_inclusive_mul( long value ); ++ulong __ovld sub_group_non_uniform_scan_inclusive_mul( ulong value ); ++float __ovld sub_group_non_uniform_scan_inclusive_mul( float value ); ++ ++char __ovld sub_group_non_uniform_scan_inclusive_min( char value ); ++uchar __ovld sub_group_non_uniform_scan_inclusive_min( uchar value ); ++short __ovld sub_group_non_uniform_scan_inclusive_min( short value ); ++ushort __ovld sub_group_non_uniform_scan_inclusive_min( ushort value ); ++int __ovld sub_group_non_uniform_scan_inclusive_min( int value ); ++uint __ovld sub_group_non_uniform_scan_inclusive_min( uint value ); ++long __ovld sub_group_non_uniform_scan_inclusive_min( long value ); ++ulong __ovld sub_group_non_uniform_scan_inclusive_min( ulong value ); ++float __ovld sub_group_non_uniform_scan_inclusive_min( float value ); ++ ++char __ovld sub_group_non_uniform_scan_inclusive_max( char value ); ++uchar __ovld sub_group_non_uniform_scan_inclusive_max( uchar value ); ++short __ovld sub_group_non_uniform_scan_inclusive_max( short value ); ++ushort __ovld sub_group_non_uniform_scan_inclusive_max( ushort value ); ++int __ovld sub_group_non_uniform_scan_inclusive_max( int value ); ++uint __ovld sub_group_non_uniform_scan_inclusive_max( uint value ); ++long __ovld sub_group_non_uniform_scan_inclusive_max( long value ); ++ulong __ovld sub_group_non_uniform_scan_inclusive_max( ulong value ); ++float __ovld sub_group_non_uniform_scan_inclusive_max( float value ); ++ ++char __ovld sub_group_non_uniform_scan_exclusive_add( char value ); ++uchar __ovld sub_group_non_uniform_scan_exclusive_add( uchar value ); ++short __ovld sub_group_non_uniform_scan_exclusive_add( short value ); ++ushort __ovld sub_group_non_uniform_scan_exclusive_add( ushort value ); ++int __ovld sub_group_non_uniform_scan_exclusive_add( int value ); ++uint __ovld sub_group_non_uniform_scan_exclusive_add( uint value ); ++long __ovld sub_group_non_uniform_scan_exclusive_add( long value ); ++ulong __ovld sub_group_non_uniform_scan_exclusive_add( ulong value ); ++float __ovld sub_group_non_uniform_scan_exclusive_add( float value ); ++ ++char __ovld sub_group_non_uniform_scan_exclusive_mul( char value ); ++uchar __ovld sub_group_non_uniform_scan_exclusive_mul( uchar value ); ++short __ovld sub_group_non_uniform_scan_exclusive_mul( short value ); ++ushort __ovld sub_group_non_uniform_scan_exclusive_mul( ushort value ); ++int __ovld sub_group_non_uniform_scan_exclusive_mul( int value ); ++uint __ovld sub_group_non_uniform_scan_exclusive_mul( uint value ); ++long __ovld sub_group_non_uniform_scan_exclusive_mul( long value ); ++ulong __ovld sub_group_non_uniform_scan_exclusive_mul( ulong value ); ++float __ovld sub_group_non_uniform_scan_exclusive_mul( float value ); ++ ++char __ovld sub_group_non_uniform_scan_exclusive_min( char value ); ++uchar __ovld sub_group_non_uniform_scan_exclusive_min( uchar value ); ++short __ovld sub_group_non_uniform_scan_exclusive_min( short value ); ++ushort __ovld sub_group_non_uniform_scan_exclusive_min( ushort value ); ++int __ovld sub_group_non_uniform_scan_exclusive_min( int value ); ++uint __ovld sub_group_non_uniform_scan_exclusive_min( uint value ); ++long __ovld sub_group_non_uniform_scan_exclusive_min( long value ); ++ulong __ovld sub_group_non_uniform_scan_exclusive_min( ulong value ); ++float __ovld sub_group_non_uniform_scan_exclusive_min( float value ); ++ ++char __ovld sub_group_non_uniform_scan_exclusive_max( char value ); ++uchar __ovld sub_group_non_uniform_scan_exclusive_max( uchar value ); ++short __ovld sub_group_non_uniform_scan_exclusive_max( short value ); ++ushort __ovld sub_group_non_uniform_scan_exclusive_max( ushort value ); ++int __ovld sub_group_non_uniform_scan_exclusive_max( int value ); ++uint __ovld sub_group_non_uniform_scan_exclusive_max( uint value ); ++long __ovld sub_group_non_uniform_scan_exclusive_max( long value ); ++ulong __ovld sub_group_non_uniform_scan_exclusive_max( ulong value ); ++float __ovld sub_group_non_uniform_scan_exclusive_max( float value ); ++ ++char __ovld sub_group_non_uniform_reduce_and( char value ); ++uchar __ovld sub_group_non_uniform_reduce_and( uchar value ); ++short __ovld sub_group_non_uniform_reduce_and( short value ); ++ushort __ovld sub_group_non_uniform_reduce_and( ushort value ); ++int __ovld sub_group_non_uniform_reduce_and( int value ); ++uint __ovld sub_group_non_uniform_reduce_and( uint value ); ++long __ovld sub_group_non_uniform_reduce_and( long value ); ++ulong __ovld sub_group_non_uniform_reduce_and( ulong value ); ++ ++char __ovld sub_group_non_uniform_reduce_or( char value ); ++uchar __ovld sub_group_non_uniform_reduce_or( uchar value ); ++short __ovld sub_group_non_uniform_reduce_or( short value ); ++ushort __ovld sub_group_non_uniform_reduce_or( ushort value ); ++int __ovld sub_group_non_uniform_reduce_or( int value ); ++uint __ovld sub_group_non_uniform_reduce_or( uint value ); ++long __ovld sub_group_non_uniform_reduce_or( long value ); ++ulong __ovld sub_group_non_uniform_reduce_or( ulong value ); ++ ++char __ovld sub_group_non_uniform_reduce_xor( char value ); ++uchar __ovld sub_group_non_uniform_reduce_xor( uchar value ); ++short __ovld sub_group_non_uniform_reduce_xor( short value ); ++ushort __ovld sub_group_non_uniform_reduce_xor( ushort value ); ++int __ovld sub_group_non_uniform_reduce_xor( int value ); ++uint __ovld sub_group_non_uniform_reduce_xor( uint value ); ++long __ovld sub_group_non_uniform_reduce_xor( long value ); ++ulong __ovld sub_group_non_uniform_reduce_xor( ulong value ); ++ ++char __ovld sub_group_non_uniform_scan_inclusive_and( char value ); ++uchar __ovld sub_group_non_uniform_scan_inclusive_and( uchar value ); ++short __ovld sub_group_non_uniform_scan_inclusive_and( short value ); ++ushort __ovld sub_group_non_uniform_scan_inclusive_and( ushort value ); ++int __ovld sub_group_non_uniform_scan_inclusive_and( int value ); ++uint __ovld sub_group_non_uniform_scan_inclusive_and( uint value ); ++long __ovld sub_group_non_uniform_scan_inclusive_and( long value ); ++ulong __ovld sub_group_non_uniform_scan_inclusive_and( ulong value ); ++ ++char __ovld sub_group_non_uniform_scan_inclusive_or( char value ); ++uchar __ovld sub_group_non_uniform_scan_inclusive_or( uchar value ); ++short __ovld sub_group_non_uniform_scan_inclusive_or( short value ); ++ushort __ovld sub_group_non_uniform_scan_inclusive_or( ushort value ); ++int __ovld sub_group_non_uniform_scan_inclusive_or( int value ); ++uint __ovld sub_group_non_uniform_scan_inclusive_or( uint value ); ++long __ovld sub_group_non_uniform_scan_inclusive_or( long value ); ++ulong __ovld sub_group_non_uniform_scan_inclusive_or( ulong value ); ++ ++char __ovld sub_group_non_uniform_scan_inclusive_xor( char value ); ++uchar __ovld sub_group_non_uniform_scan_inclusive_xor( uchar value ); ++short __ovld sub_group_non_uniform_scan_inclusive_xor( short value ); ++ushort __ovld sub_group_non_uniform_scan_inclusive_xor( ushort value ); ++int __ovld sub_group_non_uniform_scan_inclusive_xor( int value ); ++uint __ovld sub_group_non_uniform_scan_inclusive_xor( uint value ); ++long __ovld sub_group_non_uniform_scan_inclusive_xor( long value ); ++ulong __ovld sub_group_non_uniform_scan_inclusive_xor( ulong value ); ++ ++char __ovld sub_group_non_uniform_scan_exclusive_and( char value ); ++uchar __ovld sub_group_non_uniform_scan_exclusive_and( uchar value ); ++short __ovld sub_group_non_uniform_scan_exclusive_and( short value ); ++ushort __ovld sub_group_non_uniform_scan_exclusive_and( ushort value ); ++int __ovld sub_group_non_uniform_scan_exclusive_and( int value ); ++uint __ovld sub_group_non_uniform_scan_exclusive_and( uint value ); ++long __ovld sub_group_non_uniform_scan_exclusive_and( long value ); ++ulong __ovld sub_group_non_uniform_scan_exclusive_and( ulong value ); ++ ++char __ovld sub_group_non_uniform_scan_exclusive_or( char value ); ++uchar __ovld sub_group_non_uniform_scan_exclusive_or( uchar value ); ++short __ovld sub_group_non_uniform_scan_exclusive_or( short value ); ++ushort __ovld sub_group_non_uniform_scan_exclusive_or( ushort value ); ++int __ovld sub_group_non_uniform_scan_exclusive_or( int value ); ++uint __ovld sub_group_non_uniform_scan_exclusive_or( uint value ); ++long __ovld sub_group_non_uniform_scan_exclusive_or( long value ); ++ulong __ovld sub_group_non_uniform_scan_exclusive_or( ulong value ); ++ ++char __ovld sub_group_non_uniform_scan_exclusive_xor( char value ); ++uchar __ovld sub_group_non_uniform_scan_exclusive_xor( uchar value ); ++short __ovld sub_group_non_uniform_scan_exclusive_xor( short value ); ++ushort __ovld sub_group_non_uniform_scan_exclusive_xor( ushort value ); ++int __ovld sub_group_non_uniform_scan_exclusive_xor( int value ); ++uint __ovld sub_group_non_uniform_scan_exclusive_xor( uint value ); ++long __ovld sub_group_non_uniform_scan_exclusive_xor( long value ); ++ulong __ovld sub_group_non_uniform_scan_exclusive_xor( ulong value ); ++ ++int __ovld sub_group_non_uniform_reduce_logical_and( int predicate ); ++int __ovld sub_group_non_uniform_reduce_logical_or( int predicate ); ++int __ovld sub_group_non_uniform_reduce_logical_xor( int predicate ); ++ ++int __ovld sub_group_non_uniform_scan_inclusive_logical_and( int predicate ); ++int __ovld sub_group_non_uniform_scan_inclusive_logical_or( int predicate ); ++int __ovld sub_group_non_uniform_scan_inclusive_logical_xor( int predicate ); ++ ++int __ovld sub_group_non_uniform_scan_exclusive_logical_and( int predicate ); ++int __ovld sub_group_non_uniform_scan_exclusive_logical_or( int predicate ); ++int __ovld sub_group_non_uniform_scan_exclusive_logical_xor( int predicate ); ++ ++#if defined(cl_khr_fp16) ++half __ovld sub_group_non_uniform_reduce_add( half value ); ++half __ovld sub_group_non_uniform_reduce_mul( half value ); ++half __ovld sub_group_non_uniform_reduce_min( half value ); ++half __ovld sub_group_non_uniform_reduce_max( half value ); ++half __ovld sub_group_non_uniform_scan_inclusive_add( half value ); ++half __ovld sub_group_non_uniform_scan_inclusive_mul( half value ); ++half __ovld sub_group_non_uniform_scan_inclusive_min( half value ); ++half __ovld sub_group_non_uniform_scan_inclusive_max( half value ); ++half __ovld sub_group_non_uniform_scan_exclusive_add( half value ); ++half __ovld sub_group_non_uniform_scan_exclusive_mul( half value ); ++half __ovld sub_group_non_uniform_scan_exclusive_min( half value ); ++half __ovld sub_group_non_uniform_scan_exclusive_max( half value ); ++#endif // cl_khr_fp16 ++ ++#if defined(cl_khr_fp64) ++double __ovld sub_group_non_uniform_reduce_add( double value ); ++double __ovld sub_group_non_uniform_reduce_mul( double value ); ++double __ovld sub_group_non_uniform_reduce_min( double value ); ++double __ovld sub_group_non_uniform_reduce_max( double value ); ++double __ovld sub_group_non_uniform_scan_inclusive_add( double value ); ++double __ovld sub_group_non_uniform_scan_inclusive_mul( double value ); ++double __ovld sub_group_non_uniform_scan_inclusive_min( double value ); ++double __ovld sub_group_non_uniform_scan_inclusive_max( double value ); ++double __ovld sub_group_non_uniform_scan_exclusive_add( double value ); ++double __ovld sub_group_non_uniform_scan_exclusive_mul( double value ); ++double __ovld sub_group_non_uniform_scan_exclusive_min( double value ); ++double __ovld sub_group_non_uniform_scan_exclusive_max( double value ); ++#endif // cl_khr_fp64 ++ ++#endif // cl_khr_subgroup_non_uniform_arithmetic ++ ++#if defined(cl_khr_subgroup_shuffle) ++char __ovld sub_group_shuffle( char value, uint index ); ++uchar __ovld sub_group_shuffle( uchar value, uint index ); ++short __ovld sub_group_shuffle( short value, uint index ); ++ushort __ovld sub_group_shuffle( ushort value, uint index ); ++int __ovld sub_group_shuffle( int value, uint index ); ++uint __ovld sub_group_shuffle( uint value, uint index ); ++long __ovld sub_group_shuffle( long value, uint index ); ++ulong __ovld sub_group_shuffle( ulong value, uint index ); ++float __ovld sub_group_shuffle( float value, uint index ); ++ ++char __ovld sub_group_shuffle_xor( char value, uint mask ); ++uchar __ovld sub_group_shuffle_xor( uchar value, uint mask ); ++short __ovld sub_group_shuffle_xor( short value, uint mask ); ++ushort __ovld sub_group_shuffle_xor( ushort value, uint mask ); ++int __ovld sub_group_shuffle_xor( int value, uint mask ); ++uint __ovld sub_group_shuffle_xor( uint value, uint mask ); ++long __ovld sub_group_shuffle_xor( long value, uint mask ); ++ulong __ovld sub_group_shuffle_xor( ulong value, uint mask ); ++float __ovld sub_group_shuffle_xor( float value, uint mask ); ++ ++#if defined(cl_khr_fp16) ++half __ovld sub_group_shuffle( half value, uint index ); ++half __ovld sub_group_shuffle_xor( half value, uint mask ); ++#endif // cl_khr_fp16 ++ ++#if defined(cl_khr_fp64) ++double __ovld sub_group_shuffle( double value, uint index ); ++double __ovld sub_group_shuffle_xor( double value, uint mask ); ++#endif // cl_khr_fp64 ++ ++#endif // cl_khr_subgroup_shuffle ++ ++#if defined(cl_khr_subgroup_shuffle_relative) ++char __ovld sub_group_shuffle_up( char value, uint delta ); ++uchar __ovld sub_group_shuffle_up( uchar value, uint delta ); ++short __ovld sub_group_shuffle_up( short value, uint delta ); ++ushort __ovld sub_group_shuffle_up( ushort value, uint delta ); ++int __ovld sub_group_shuffle_up( int value, uint delta ); ++uint __ovld sub_group_shuffle_up( uint value, uint delta ); ++long __ovld sub_group_shuffle_up( long value, uint delta ); ++ulong __ovld sub_group_shuffle_up( ulong value, uint delta ); ++float __ovld sub_group_shuffle_up( float value, uint delta ); ++ ++char __ovld sub_group_shuffle_down( char value, uint delta ); ++uchar __ovld sub_group_shuffle_down( uchar value, uint delta ); ++short __ovld sub_group_shuffle_down( short value, uint delta ); ++ushort __ovld sub_group_shuffle_down( ushort value, uint delta ); ++int __ovld sub_group_shuffle_down( int value, uint delta ); ++uint __ovld sub_group_shuffle_down( uint value, uint delta ); ++long __ovld sub_group_shuffle_down( long value, uint delta ); ++ulong __ovld sub_group_shuffle_down( ulong value, uint delta ); ++float __ovld sub_group_shuffle_down( float value, uint delta ); ++ ++#if defined(cl_khr_fp16) ++half __ovld sub_group_shuffle_up( half value, uint delta ); ++half __ovld sub_group_shuffle_down( half value, uint delta ); ++#endif // cl_khr_fp16 ++ ++#if defined(cl_khr_fp64) ++double __ovld sub_group_shuffle_up( double value, uint delta ); ++double __ovld sub_group_shuffle_down( double value, uint delta ); ++#endif // cl_khr_fp64 ++ ++#endif // cl_khr_subgroup_shuffle_relative ++ ++#if defined(cl_khr_subgroup_clustered_reduce) ++char __ovld sub_group_clustered_reduce_add( char value, uint clustersize ); ++uchar __ovld sub_group_clustered_reduce_add( uchar value, uint clustersize ); ++short __ovld sub_group_clustered_reduce_add( short value, uint clustersize ); ++ushort __ovld sub_group_clustered_reduce_add( ushort value, uint clustersize ); ++int __ovld sub_group_clustered_reduce_add( int value, uint clustersize ); ++uint __ovld sub_group_clustered_reduce_add( uint value, uint clustersize ); ++long __ovld sub_group_clustered_reduce_add( long value, uint clustersize ); ++ulong __ovld sub_group_clustered_reduce_add( ulong value, uint clustersize ); ++float __ovld sub_group_clustered_reduce_add( float value, uint clustersize ); ++ ++char __ovld sub_group_clustered_reduce_mul( char value, uint clustersize ); ++uchar __ovld sub_group_clustered_reduce_mul( uchar value, uint clustersize ); ++short __ovld sub_group_clustered_reduce_mul( short value, uint clustersize ); ++ushort __ovld sub_group_clustered_reduce_mul( ushort value, uint clustersize ); ++int __ovld sub_group_clustered_reduce_mul( int value, uint clustersize ); ++uint __ovld sub_group_clustered_reduce_mul( uint value, uint clustersize ); ++long __ovld sub_group_clustered_reduce_mul( long value, uint clustersize ); ++ulong __ovld sub_group_clustered_reduce_mul( ulong value, uint clustersize ); ++float __ovld sub_group_clustered_reduce_mul( float value, uint clustersize ); ++ ++char __ovld sub_group_clustered_reduce_min( char value, uint clustersize ); ++uchar __ovld sub_group_clustered_reduce_min( uchar value, uint clustersize ); ++short __ovld sub_group_clustered_reduce_min( short value, uint clustersize ); ++ushort __ovld sub_group_clustered_reduce_min( ushort value, uint clustersize ); ++int __ovld sub_group_clustered_reduce_min( int value, uint clustersize ); ++uint __ovld sub_group_clustered_reduce_min( uint value, uint clustersize ); ++long __ovld sub_group_clustered_reduce_min( long value, uint clustersize ); ++ulong __ovld sub_group_clustered_reduce_min( ulong value, uint clustersize ); ++float __ovld sub_group_clustered_reduce_min( float value, uint clustersize ); ++ ++char __ovld sub_group_clustered_reduce_max( char value, uint clustersize ); ++uchar __ovld sub_group_clustered_reduce_max( uchar value, uint clustersize ); ++short __ovld sub_group_clustered_reduce_max( short value, uint clustersize ); ++ushort __ovld sub_group_clustered_reduce_max( ushort value, uint clustersize ); ++int __ovld sub_group_clustered_reduce_max( int value, uint clustersize ); ++uint __ovld sub_group_clustered_reduce_max( uint value, uint clustersize ); ++long __ovld sub_group_clustered_reduce_max( long value, uint clustersize ); ++ulong __ovld sub_group_clustered_reduce_max( ulong value, uint clustersize ); ++float __ovld sub_group_clustered_reduce_max( float value, uint clustersize ); ++ ++char __ovld sub_group_clustered_reduce_and( char value, uint clustersize ); ++uchar __ovld sub_group_clustered_reduce_and( uchar value, uint clustersize ); ++short __ovld sub_group_clustered_reduce_and( short value, uint clustersize ); ++ushort __ovld sub_group_clustered_reduce_and( ushort value, uint clustersize ); ++int __ovld sub_group_clustered_reduce_and( int value, uint clustersize ); ++uint __ovld sub_group_clustered_reduce_and( uint value, uint clustersize ); ++long __ovld sub_group_clustered_reduce_and( long value, uint clustersize ); ++ulong __ovld sub_group_clustered_reduce_and( ulong value, uint clustersize ); ++ ++char __ovld sub_group_clustered_reduce_or( char value, uint clustersize ); ++uchar __ovld sub_group_clustered_reduce_or( uchar value, uint clustersize ); ++short __ovld sub_group_clustered_reduce_or( short value, uint clustersize ); ++ushort __ovld sub_group_clustered_reduce_or( ushort value, uint clustersize ); ++int __ovld sub_group_clustered_reduce_or( int value, uint clustersize ); ++uint __ovld sub_group_clustered_reduce_or( uint value, uint clustersize ); ++long __ovld sub_group_clustered_reduce_or( long value, uint clustersize ); ++ulong __ovld sub_group_clustered_reduce_or( ulong value, uint clustersize ); ++ ++char __ovld sub_group_clustered_reduce_xor( char value, uint clustersize ); ++uchar __ovld sub_group_clustered_reduce_xor( uchar value, uint clustersize ); ++short __ovld sub_group_clustered_reduce_xor( short value, uint clustersize ); ++ushort __ovld sub_group_clustered_reduce_xor( ushort value, uint clustersize ); ++int __ovld sub_group_clustered_reduce_xor( int value, uint clustersize ); ++uint __ovld sub_group_clustered_reduce_xor( uint value, uint clustersize ); ++long __ovld sub_group_clustered_reduce_xor( long value, uint clustersize ); ++ulong __ovld sub_group_clustered_reduce_xor( ulong value, uint clustersize ); ++ ++int __ovld sub_group_clustered_reduce_logical_and( int predicate, uint clustersize ); ++int __ovld sub_group_clustered_reduce_logical_or( int predicate, uint clustersize ); ++int __ovld sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize ); ++ ++#if defined(cl_khr_fp16) ++half __ovld sub_group_clustered_reduce_add( half value, uint clustersize ); ++half __ovld sub_group_clustered_reduce_mul( half value, uint clustersize ); ++half __ovld sub_group_clustered_reduce_min( half value, uint clustersize ); ++half __ovld sub_group_clustered_reduce_max( half value, uint clustersize ); ++#endif // cl_khr_fp16 ++ ++#if defined(cl_khr_fp64) ++double __ovld sub_group_clustered_reduce_add( double value, uint clustersize ); ++double __ovld sub_group_clustered_reduce_mul( double value, uint clustersize ); ++double __ovld sub_group_clustered_reduce_min( double value, uint clustersize ); ++double __ovld sub_group_clustered_reduce_max( double value, uint clustersize ); ++#endif // cl_khr_fp64 ++ ++#endif // cl_khr_subgroup_clustered_reduce ++ + #if defined(cl_intel_subgroups) + // Intel-Specific Sub Group Functions + float __ovld __conv intel_sub_group_shuffle( float x, uint c ); +diff --git a/clang/test/SemaOpenCL/extension-version.cl b/clang/test/SemaOpenCL/extension-version.cl +index 0e6bbb7d3bcd..86c78143a0eb 100644 +--- a/clang/test/SemaOpenCL/extension-version.cl ++++ b/clang/test/SemaOpenCL/extension-version.cl +@@ -333,3 +333,86 @@ + #endif + #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : enable + ++#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) ++#ifndef cl_khr_subgroup_extended_types ++#error "Missing cl_khr_subgroup_extended_types" ++#endif ++#else ++#ifdef cl_khr_subgroup_extended_types ++#error "Incorrect cl_khr_subgroup_extended_types define" ++#endif ++// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_extended_types' - ignoring}} ++#endif ++#pragma OPENCL EXTENSION cl_khr_subgroup_extended_types : enable ++ ++#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) ++#ifndef cl_khr_subgroup_non_uniform_vote ++#error "Missing cl_khr_subgroup_non_uniform_vote" ++#endif ++#else ++#ifdef cl_khr_subgroup_non_uniform_vote ++#error "Incorrect cl_khr_subgroup_non_uniform_vote define" ++#endif ++// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_vote' - ignoring}} ++#endif ++#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable ++ ++#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) ++#ifndef cl_khr_subgroup_ballot ++#error "Missing cl_khr_subgroup_ballot" ++#endif ++#else ++#ifdef cl_khr_subgroup_ballot ++#error "Incorrect cl_khr_subgroup_ballot define" ++#endif ++// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_ballot' - ignoring}} ++#endif ++#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable ++ ++#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) ++#ifndef cl_khr_subgroup_non_uniform_arithmetic ++#error "Missing cl_khr_subgroup_non_uniform_arithmetic" ++#endif ++#else ++#ifdef cl_khr_subgroup_non_uniform_arithmetic ++#error "Incorrect cl_khr_subgroup_non_uniform_arithmetic define" ++#endif ++// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_arithmetic' - ignoring}} ++#endif ++#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable ++ ++#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) ++#ifndef cl_khr_subgroup_shuffle ++#error "Missing cl_khr_subgroup_shuffle" ++#endif ++#else ++#ifdef cl_khr_subgroup_shuffle ++#error "Incorrect cl_khr_subgroup_shuffle define" ++#endif ++// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle' - ignoring}} ++#endif ++#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable ++ ++#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) ++#ifndef cl_khr_subgroup_shuffle_relative ++#error "Missing cl_khr_subgroup_shuffle_relative" ++#endif ++#else ++#ifdef cl_khr_subgroup_shuffle_relative ++#error "Incorrect cl_khr_subgroup_shuffle_relative define" ++#endif ++// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle_relative' - ignoring}} ++#endif ++#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable ++ ++#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) ++#ifndef cl_khr_subgroup_clustered_reduce ++#error "Missing cl_khr_subgroup_clustered_reduce" ++#endif ++#else ++#ifdef cl_khr_subgroup_clustered_reduce ++#error "Incorrect cl_khr_subgroup_clustered_reduce define" ++#endif ++// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_clustered_reduce' - ignoring}} ++#endif ++#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-OpenCL-3.0-support.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-OpenCL-3.0-support.patch new file mode 100644 index 00000000..53395ea0 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-OpenCL-3.0-support.patch @@ -0,0 +1,8259 @@ +From 31ec702cb365f4d02dd2146fb4329d642b8fc30b Mon Sep 17 00:00:00 2001 +From: Naveen Saini +Date: Wed, 7 Apr 2021 16:36:10 +0800 +Subject: [PATCH 1/2] OpenCL 3.0 support + +Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0001-OpenCL-3.0-support.patch] +Signed-off-by: Anton Zabaznov +Signed-off-by: Naveen Saini +--- + clang/include/clang/Basic/Builtins.def | 65 +- + clang/include/clang/Basic/Builtins.h | 13 +- + .../clang/Basic/DiagnosticParseKinds.td | 2 + + .../clang/Basic/DiagnosticSemaKinds.td | 7 + + clang/include/clang/Basic/LangOptions.def | 2 + + clang/include/clang/Basic/LangStandards.def | 4 + + .../include/clang/Basic/OpenCLExtensions.def | 15 + + clang/include/clang/Basic/OpenCLOptions.h | 175 +- + clang/include/clang/Driver/Options.td | 2 +- + clang/include/clang/Sema/Sema.h | 9 + + clang/lib/AST/ASTContext.cpp | 3 +- + clang/lib/Basic/Builtins.cpp | 28 +- + clang/lib/Basic/TargetInfo.cpp | 11 + + clang/lib/Basic/Targets.cpp | 1 - + clang/lib/CodeGen/CodeGenFunction.cpp | 6 +- + clang/lib/Frontend/CompilerInvocation.cpp | 22 +- + clang/lib/Frontend/InitPreprocessor.cpp | 6 +- + clang/lib/Headers/opencl-c-base.h | 75 +- + clang/lib/Headers/opencl-c.h | 3228 ++++++++++++++--- + clang/lib/Parse/ParseDecl.cpp | 12 +- + clang/lib/Parse/ParsePragma.cpp | 10 +- + clang/lib/Sema/OpenCLBuiltins.td | 49 +- + clang/lib/Sema/Sema.cpp | 47 +- + clang/lib/Sema/SemaChecking.cpp | 38 +- + clang/lib/Sema/SemaDecl.cpp | 15 +- + clang/lib/Sema/SemaDeclAttr.cpp | 9 +- + clang/lib/Sema/SemaDeclCXX.cpp | 10 + + clang/lib/Sema/SemaLookup.cpp | 19 +- + clang/lib/Sema/SemaType.cpp | 14 +- + .../CodeGenOpenCL/addr-space-struct-arg.cl | 7 +- + .../address-spaces-conversions.cl | 2 + + .../CodeGenOpenCL/address-spaces-mangling.cl | 3 +- + clang/test/CodeGenOpenCL/address-spaces.cl | 4 + + .../amdgcn-automatic-variable.cl | 1 + + .../CodeGenOpenCL/amdgpu-sizeof-alignof.cl | 21 +- + .../CodeGenOpenCL/arm-integer-dot-product.cl | 1 + + .../test/CodeGenOpenCL/cl-uniform-wg-size.cl | 2 + + clang/test/CodeGenOpenCL/fpmath.cl | 2 + + .../generic-address-space-feature.cl | 28 + + .../intel-subgroups-avc-ext-types.cl | 1 + + .../kernels-have-spir-cc-by-default.cl | 3 + + clang/test/CodeGenOpenCL/logical-ops.cl | 1 + + clang/test/CodeGenOpenCL/no-half.cl | 1 + + clang/test/CodeGenOpenCL/pipe_builtin.cl | 3 + + clang/test/CodeGenOpenCL/pipe_types.cl | 1 + + clang/test/CodeGenOpenCL/printf.cl | 2 + + clang/test/CodeGenOpenCL/unroll-hint.cl | 1 + + clang/test/Driver/autocomplete.c | 2 + + clang/test/Driver/opencl.cl | 2 + + clang/test/Driver/unknown-std.cl | 1 + + clang/test/Frontend/stdlang.c | 1 + + clang/test/Headers/opencl-c-header.cl | 7 +- + clang/test/Index/pipe-size.cl | 7 + + clang/test/Preprocessor/predefined-macros.c | 13 + + .../Sema/feature-extensions-simult-support.cl | 75 + + clang/test/Sema/features-ignore-pragma.cl | 24 + + clang/test/Sema/opencl-features-pipes.cl | 18 + + clang/test/Sema/opencl-features.cl | 128 + + clang/test/Sema/pipe_builtins_feature.cl | 21 + + .../address-spaces-conversions-cl2.0.cl | 3 + + clang/test/SemaOpenCL/address-spaces.cl | 1 + + .../SemaOpenCL/cl20-device-side-enqueue.cl | 16 +- + .../SemaOpenCL/forget-unsupported-builtins.cl | 22 + + clang/test/SemaOpenCL/image-features.cl | 20 + + .../SemaOpenCL/invalid-pipe-builtin-cl2.0.cl | 1 + + clang/test/SemaOpenCL/storageclass-cl20.cl | 1 + + .../TableGen/ClangOpenCLBuiltinEmitter.cpp | 35 +- + 67 files changed, 3656 insertions(+), 723 deletions(-) + create mode 100644 clang/test/CodeGenOpenCL/generic-address-space-feature.cl + create mode 100644 clang/test/Sema/feature-extensions-simult-support.cl + create mode 100644 clang/test/Sema/features-ignore-pragma.cl + create mode 100644 clang/test/Sema/opencl-features-pipes.cl + create mode 100644 clang/test/Sema/opencl-features.cl + create mode 100644 clang/test/Sema/pipe_builtins_feature.cl + create mode 100644 clang/test/SemaOpenCL/forget-unsupported-builtins.cl + create mode 100644 clang/test/SemaOpenCL/image-features.cl + +diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def +index 1a6c85ce2dd3..b76e277f0337 100644 +--- a/clang/include/clang/Basic/Builtins.def ++++ b/clang/include/clang/Basic/Builtins.def +@@ -106,6 +106,10 @@ + # define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) + #endif + ++#if defined(BUILTIN) && !defined(OPENCLBUILTIN) ++# define OPENCLBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS) ++#endif ++ + // Standard libc/libm functions: + BUILTIN(__builtin_atan2 , "ddd" , "Fne") + BUILTIN(__builtin_atan2f, "fff" , "Fne") +@@ -1514,50 +1518,54 @@ BUILTIN(__builtin_coro_param, "bv*v*", "n") + + // OpenCL v2.0 s6.13.16, s9.17.3.5 - Pipe functions. + // We need the generic prototype, since the packet type could be anything. +-LANGBUILTIN(read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(reserve_read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(reserve_write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(commit_write_pipe, "v.", "tn", OCLC20_LANG) +-LANGBUILTIN(commit_read_pipe, "v.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) +-LANGBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) +-LANGBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + + // OpenCL v2.0 s6.13.17 - Enqueue kernel functions. + // Custom builtin check allows to perform special check of passed block arguments. +-LANGBUILTIN(enqueue_kernel, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(enqueue_kernel, "i.", "tn", OCLC2P_LANG, ++ "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC2P_LANG, ++ "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", ++ OCLC2P_LANG, "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", ++ OCLC2P_LANG, "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_device_enqueue") + + // OpenCL v2.0 s6.13.9 - Address space qualifier functions. + // FIXME: Pointer parameters of OpenCL builtins should have their address space + // requirement defined. +-LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG) +-LANGBUILTIN(to_local, "v*v*", "tn", OCLC20_LANG) +-LANGBUILTIN(to_private, "v*v*", "tn", OCLC20_LANG) ++OPENCLBUILTIN(to_global, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") ++OPENCLBUILTIN(to_local, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") ++OPENCLBUILTIN(to_private, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") + + // OpenCL half load/store builtin +-LANGBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES) +-LANGBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES) +-LANGBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES) +-LANGBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES) ++OPENCLBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES, "") ++OPENCLBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES, "") ++OPENCLBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES, "") ++OPENCLBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES, "") + + // Builtins for os_log/os_trace + BUILTIN(__builtin_os_log_format_buffer_size, "zcC*.", "p:0:nut") +@@ -1578,3 +1586,4 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") + #undef BUILTIN + #undef LIBBUILTIN + #undef LANGBUILTIN ++#undef OPENCLBUILTIN +diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h +index e4ed482d9068..713ea4bc267d 100644 +--- a/clang/include/clang/Basic/Builtins.h ++++ b/clang/include/clang/Basic/Builtins.h +@@ -33,13 +33,13 @@ enum LanguageID { + CXX_LANG = 0x4, // builtin for cplusplus only. + OBJC_LANG = 0x8, // builtin for objective-c and objective-c++ + MS_LANG = 0x10, // builtin requires MS mode. +- OCLC20_LANG = 0x20, // builtin for OpenCL C 2.0 only. ++ OCLC2P_LANG = 0x20, // builtin for OpenCL C 2.0+ versions. + OCLC1X_LANG = 0x40, // builtin for OpenCL C 1.x only. + OMP_LANG = 0x80, // builtin requires OpenMP. + ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages. + ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG, // builtin requires GNU mode. + ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG, // builtin requires MS mode. +- ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC20_LANG // builtin for OCLC languages. ++ ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC2P_LANG // builtin for OCLC languages. + }; + + namespace Builtin { +@@ -228,6 +228,10 @@ public: + /// for non-builtins. + bool canBeRedeclared(unsigned ID) const; + ++ bool requiresFeatures(unsigned ID) const { ++ return requiresFeatures(getRecord(ID)); ++ } ++ + private: + const Info &getRecord(unsigned ID) const; + +@@ -235,6 +239,11 @@ private: + bool builtinIsSupported(const Builtin::Info &BuiltinInfo, + const LangOptions &LangOpts); + ++ bool OclBuiltinIsSupported(const Builtin::Info &BuiltinInfo, ++ const LangOptions &LangOpts) const; ++ ++ bool requiresFeatures(const Builtin::Info &BuiltinInfo) const; ++ + /// Helper function for isPrintfLike and isScanfLike. + bool isLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg, + const char *Fmt) const; +diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td +index 33adf093693f..43ea63586f91 100644 +--- a/clang/include/clang/Basic/DiagnosticParseKinds.td ++++ b/clang/include/clang/Basic/DiagnosticParseKinds.td +@@ -1199,6 +1199,8 @@ def warn_pragma_unsupported_extension : Warning< + "unsupported OpenCL extension %0 - ignoring">, InGroup; + def warn_pragma_extension_is_core : Warning< + "OpenCL extension %0 is core feature or supported optional core feature - ignoring">, InGroup>, DefaultIgnore; ++def warn_opencl_pragma_feature_ignore : Warning< ++ "OpenCL feature support can't be controlled via pragma, ignoring">, InGroup; + + // OpenCL errors. + def err_opencl_taking_function_address_parser : Error< +diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td +index 917377420505..91648aa27820 100644 +--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td ++++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td +@@ -9378,6 +9378,13 @@ def ext_opencl_ext_vector_type_rgba_selector: ExtWarn< + def err_openclcxx_placement_new : Error< + "use of placement new requires explicit declaration">; + ++def err_opencl_pipes_require_feat : Error< ++ "usage of OpenCL pipes requires feature support">; ++def err_opencl_memory_scope_require_feat : Error< ++ "usage of memory scope requires feature support">; ++def err_opencl_memory_ordering_require_feat : Error< ++ "usage of memory ordering requires feature support">; ++ + // MIG routine annotations. + def warn_mig_server_routine_does_not_return_kern_return_t : Warning< + "'mig_server_routine' attribute only applies to routines that return a kern_return_t">, +diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def +index 3319a3123976..406f45c0be69 100644 +--- a/clang/include/clang/Basic/LangOptions.def ++++ b/clang/include/clang/Basic/LangOptions.def +@@ -203,6 +203,8 @@ LANGOPT(OpenCL , 1, 0, "OpenCL") + LANGOPT(OpenCLVersion , 32, 0, "OpenCL C version") + LANGOPT(OpenCLCPlusPlus , 1, 0, "C++ for OpenCL") + LANGOPT(OpenCLCPlusPlusVersion , 32, 0, "C++ for OpenCL version") ++LANGOPT(OpenCLGenericKeyword , 1, 0, "OpenCL generic keyword") ++LANGOPT(OpenCLPipeKeyword , 1, 0, "OpenCL pipe keyword") + LANGOPT(NativeHalfType , 1, 0, "Native half type support") + LANGOPT(NativeHalfArgsAndReturns, 1, 0, "Native half args and returns") + LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns") +diff --git a/clang/include/clang/Basic/LangStandards.def b/clang/include/clang/Basic/LangStandards.def +index 7f1a24db7e9b..69aaba3ff5a2 100644 +--- a/clang/include/clang/Basic/LangStandards.def ++++ b/clang/include/clang/Basic/LangStandards.def +@@ -167,6 +167,9 @@ LANGSTANDARD(opencl12, "cl1.2", + LANGSTANDARD(opencl20, "cl2.0", + OpenCL, "OpenCL 2.0", + LineComment | C99 | Digraphs | HexFloat | OpenCL) ++LANGSTANDARD(opencl30, "cl3.0", ++ OpenCL, "OpenCL 3.0", ++ LineComment | C99 | Digraphs | HexFloat | OpenCL) + LANGSTANDARD(openclcpp, "clc++", + OpenCL, "C++ for OpenCL", + LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 | +@@ -176,6 +179,7 @@ LANGSTANDARD_ALIAS_DEPR(opencl10, "CL") + LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1") + LANGSTANDARD_ALIAS_DEPR(opencl12, "CL1.2") + LANGSTANDARD_ALIAS_DEPR(opencl20, "CL2.0") ++LANGSTANDARD_ALIAS_DEPR(opencl30, "CL3.0") + LANGSTANDARD_ALIAS_DEPR(openclcpp, "CLC++") + + // CUDA +diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def +index 517481584313..608f78a13eef 100644 +--- a/clang/include/clang/Basic/OpenCLExtensions.def ++++ b/clang/include/clang/Basic/OpenCLExtensions.def +@@ -93,6 +93,21 @@ OPENCLEXT_INTERNAL(cl_intel_subgroups, 120, ~0U) + OPENCLEXT_INTERNAL(cl_intel_subgroups_short, 120, ~0U) + OPENCLEXT_INTERNAL(cl_intel_device_side_avc_motion_estimation, 120, ~0U) + ++OPENCLEXT_INTERNAL(__opencl_c_pipes, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_generic_address_space, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_work_group_collective_functions, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_order_acq_rel, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_order_seq_cst, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_device, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_all_devices, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_subgroups, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_3d_image_writes, 100, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_device_enqueue, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_read_write_images, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_program_scope_global_variables, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_fp64, 120, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_int64, 100, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_images, 100, ~0U) + #undef OPENCLEXT_INTERNAL + + #ifdef OPENCLEXT +diff --git a/clang/include/clang/Basic/OpenCLOptions.h b/clang/include/clang/Basic/OpenCLOptions.h +index 15661154eab5..9a3a8e33cadd 100644 +--- a/clang/include/clang/Basic/OpenCLOptions.h ++++ b/clang/include/clang/Basic/OpenCLOptions.h +@@ -16,11 +16,16 @@ + + #include "clang/Basic/LangOptions.h" + #include "llvm/ADT/StringMap.h" ++#include "llvm/ADT/StringSwitch.h" + + namespace clang { + + /// OpenCL supported extensions and optional core features + class OpenCLOptions { ++ // OpenCL Version ++ unsigned CLVer = 120; ++ bool IsOpenCLCPlusPlus = false; ++ + struct Info { + bool Supported; // Is this option supported + bool Enabled; // Is this option enabled +@@ -31,7 +36,38 @@ class OpenCLOptions { + :Supported(S), Enabled(E), Avail(A), Core(C){} + }; + llvm::StringMap OptMap; ++ + public: ++ void setOpenCLVersion(const LangOptions &LO) { ++ IsOpenCLCPlusPlus = LO.OpenCLCPlusPlus; ++ CLVer = IsOpenCLCPlusPlus ? 200 : LO.OpenCLVersion; ++ } ++ ++ // Get extension which is semantically equivalent to a given feature ++ // if exists (e.g. __opencl_c_subgroups -> cl_khr_subgroups) ++ llvm::Optional getEquivalentExtension(StringRef Feature) const { ++ return llvm::StringSwitch>(Feature) ++ .Case("__opencl_c_3d_image_writes", ++ Optional("cl_khr_3d_image_writes")) ++ .Case("__opencl_c_subgroups", Optional("cl_khr_subgroups")) ++ .Case("__opencl_c_fp64", Optional("cl_khr_fp64")) ++ .Default(Optional()); ++ } ++ ++ // Same as above but for extensions ++ llvm::Optional getEquivalentFeature(StringRef Extension) const { ++ return llvm::StringSwitch>(Extension) ++ .Case("cl_khr_3d_image_writes", ++ Optional("__opencl_c_3d_image_writes")) ++ .Case("cl_khr_subgroups", Optional("__opencl_c_subgroups")) ++ .Case("cl_khr_fp64", Optional("__opencl_c_fp64")) ++ .Default(Optional()); ++ } ++ ++ bool isFeature(llvm::StringRef Ext) const { ++ return Ext.startswith("__opencl_c"); ++ } ++ + bool isKnown(llvm::StringRef Ext) const { + return OptMap.find(Ext) != OptMap.end(); + } +@@ -42,32 +78,88 @@ public: + + // Is supported as either an extension or an (optional) core feature for + // OpenCL version \p CLVer. +- bool isSupported(llvm::StringRef Ext, const LangOptions &LO) const { ++ bool isSupported(llvm::StringRef Ext) const { + // In C++ mode all extensions should work at least as in v2.0. +- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; + auto I = OptMap.find(Ext)->getValue(); + return I.Supported && I.Avail <= CLVer; + } + + // Is supported (optional) OpenCL core features for OpenCL version \p CLVer. + // For supported extension, return false. +- bool isSupportedCore(llvm::StringRef Ext, const LangOptions &LO) const { ++ bool isSupportedCore(llvm::StringRef Ext) const { + // In C++ mode all extensions should work at least as in v2.0. +- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; + auto I = OptMap.find(Ext)->getValue(); + return I.Supported && I.Avail <= CLVer && I.Core != ~0U && CLVer >= I.Core; + } + + // Is supported OpenCL extension for OpenCL version \p CLVer. + // For supported (optional) core feature, return false. +- bool isSupportedExtension(llvm::StringRef Ext, const LangOptions &LO) const { ++ bool isSupportedExtension(llvm::StringRef Ext) const { + // In C++ mode all extensions should work at least as in v2.0. +- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; + auto I = OptMap.find(Ext)->getValue(); + return I.Supported && I.Avail <= CLVer && (I.Core == ~0U || CLVer < I.Core); + } + ++ // Support features whose support is directly related to the ++ // specific OpenCL version. For example, OpenCL 2.0 supports ++ // all features that are optional in 3.0 ++ void adjustFeatures() { ++ // Support int64 by default (assume compiling for FULL profile) ++ OptMap["__opencl_c_int64"].Supported = true; ++ ++ if (CLVer >= 300) { ++ // Simultaneously support extension and corresponding feature ++ for (llvm::StringRef F : ++ {"__opencl_c_subgroups", "__opencl_c_3d_image_writes", ++ "__opencl_c_fp64"}) { ++ auto Ext = getEquivalentExtension(F); ++ OptMap[*Ext].Supported = OptMap[F].Supported; ++ } ++ ++ // OpenCL C compilers that define the feature macro __opencl_c_pipes or ++ // or __opencl_c_device_enqueue must also define the ++ // feature macro __opencl_c_generic_address_space. ++ if (OptMap["__opencl_c_pipes"].Supported || ++ OptMap["__opencl_c_device_enqueue"].Supported) ++ OptMap["__opencl_c_generic_address_space"].Supported = true; ++ ++ // OpenCL C compilers that define the feature macro ++ // __opencl_c_3d_image_writes or __opencl_c_read_write_images must also ++ // define the feature macro __opencl_c_images ++ if (OptMap["__opencl_c_3d_image_writes"].Supported || ++ OptMap["__opencl_c_read_write_images"].Supported) ++ OptMap["__opencl_c_images"].Supported = true; ++ ++ // All other features are already supported with options ++ // or in target settings ++ return; ++ } ++ ++ auto FeaturesRange = llvm::make_filter_range( ++ OptMap, [&](llvm::StringMapEntry &OptVal) { ++ auto Opt = OptVal.getKey(); ++ return isFeature(Opt); ++ }); ++ ++ for (auto &It : FeaturesRange) { ++ auto &Info = It.getValue(); ++ // For OpenCL version less then 3.0 some ++ // features should be supported simulateneously ++ // with specific extension ++ if (Optional Ext = getEquivalentExtension(It.getKey())) ++ Info.Supported = Info.Enabled = OptMap[*Ext].Supported; ++ else if (Info.Avail <= CLVer) ++ Info.Supported = Info.Enabled = true; ++ } ++ } ++ + void enable(llvm::StringRef Ext, bool V = true) { ++ // Ignore disabling extensions if corresponding features ++ // already supported for OpenCL version higher then 3.0 ++ if (CLVer >= 300) ++ if (Optional F = getEquivalentFeature(Ext)) ++ if (V != OptMap[*F].Enabled) ++ return; + OptMap[Ext].Enabled = V; + } + +@@ -96,7 +188,7 @@ public: + OptMap[Ext].Supported = V; + } + +- OpenCLOptions(){ ++ OpenCLOptions() { + #define OPENCLEXT_INTERNAL(Ext, AvailVer, CoreVer) \ + OptMap[#Ext].Avail = AvailVer; \ + OptMap[#Ext].Core = CoreVer; +@@ -104,35 +196,86 @@ public: + } + + void addSupport(const OpenCLOptions &Opts) { ++ assert(IsOpenCLCPlusPlus == Opts.IsOpenCLCPlusPlus && CLVer == Opts.CLVer); + for (auto &I:Opts.OptMap) +- if (I.second.Supported) ++ if (I.second.Supported) { + OptMap[I.getKey()].Supported = true; ++ // All features are enabled as they are supported ++ if (isFeature(I.getKey())) ++ OptMap[I.getKey()].Enabled = true; ++ } ++ if (CLVer >= 300) { ++ // Enabling extensions with respect to features ++ for (llvm::StringRef Ext : ++ {"cl_khr_3d_image_writes", "cl_khr_subgroups", "cl_khr_fp64"}) { ++ auto Feature = getEquivalentFeature(Ext); ++ enable(Ext, OptMap[*Feature].Enabled); ++ } ++ } + } + + void copy(const OpenCLOptions &Opts) { ++ CLVer = Opts.CLVer; ++ IsOpenCLCPlusPlus = Opts.IsOpenCLCPlusPlus; + OptMap = Opts.OptMap; + } + + // Turn on or off support of all options. + void supportAll(bool On = true) { +- for (llvm::StringMap::iterator I = OptMap.begin(), +- E = OptMap.end(); I != E; ++I) +- I->second.Supported = On; ++ for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); ++ I != E; ++I) ++ if (!isFeature(I->getKey())) ++ I->second.Supported = On; + } + + void disableAll() { +- for (llvm::StringMap::iterator I = OptMap.begin(), +- E = OptMap.end(); I != E; ++I) +- I->second.Enabled = false; ++ for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); ++ I != E; ++I) { ++ auto Ext = I->getKey(); ++ if (!isFeature(Ext)) ++ enable(Ext, false); ++ } + } + +- void enableSupportedCore(LangOptions LO) { ++ void enableSupportedCore() { + for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); + I != E; ++I) +- if (isSupportedCore(I->getKey(), LO)) ++ if (isSupportedCore(I->getKey())) + I->second.Enabled = true; + } + ++ // This enum specifies how OpenCL versions map into values ++ // for encoding. This is used when generating built-ins ++ // from tablegen ++ enum OpenCLVersionsEncodings : unsigned short { ++ OPENCL_C_100_CODE = 0x1, ++ OPENCL_C_110_CODE = 0x2, ++ OPENCL_C_120_CODE = 0x4, ++ OPENCL_C_200_CODE = 0x8, ++ OPENCL_C_300_CODE = 0x10, ++ OPENCL_C_ALL_CODE = 0x1f ++ }; ++ ++ // Encode version into single integer ++ static unsigned short EncodeVersion(unsigned OpenCLVersion) { ++ switch (OpenCLVersion) { ++ default: ++ llvm_unreachable("Unknown OpenCL version"); ++ case 0: ++ return OpenCLVersionsEncodings::OPENCL_C_ALL_CODE; ++ case 100: ++ return OpenCLVersionsEncodings::OPENCL_C_100_CODE; ++ case 110: ++ return OpenCLVersionsEncodings::OPENCL_C_110_CODE; ++ case 120: ++ return OpenCLVersionsEncodings::OPENCL_C_120_CODE; ++ case 200: ++ return OpenCLVersionsEncodings::OPENCL_C_200_CODE; ++ case 300: ++ return OpenCLVersionsEncodings::OPENCL_C_300_CODE; ++ } ++ } ++ + friend class ASTWriter; + friend class ASTReader; + }; +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index 391c895a453b..e03c22c749ad 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -522,7 +522,7 @@ def cl_mad_enable : Flag<["-"], "cl-mad-enable">, Group, Flags<[CC + def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group, Flags<[CC1Option]>, + HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">; + def cl_std_EQ : Joined<["-"], "cl-std=">, Group, Flags<[CC1Option]>, +- HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,clc++,CLC++">; ++ HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,clc++,CLC++">; + def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group, Flags<[CC1Option]>, + HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; + def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group, Flags<[CC1Option]>, +diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h +index 842e49602274..d8ac91bc8a31 100644 +--- a/clang/include/clang/Sema/Sema.h ++++ b/clang/include/clang/Sema/Sema.h +@@ -9632,6 +9632,10 @@ public: + /// \return true if type is disabled. + bool checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E); + ++ bool checkOpenCLSubgroupExtForCallExpr(CallExpr *Call); ++ ++ bool isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const; ++ + //===--------------------------------------------------------------------===// + // OpenMP directives and clauses. + // +@@ -11102,6 +11106,11 @@ public: + /// that the user intended an assignment used as condition. + void DiagnoseEqualityWithExtraParens(ParenExpr *ParenE); + ++ template ++ void DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName, ++ DiagLocT DiagLoc, DiagInfoT DiagInfo, ++ unsigned Selector, SourceRange SrcRange); ++ + /// CheckCXXBooleanCondition - Returns true if conversion to bool is invalid. + ExprResult CheckCXXBooleanCondition(Expr *CondExpr, bool IsConstexpr = false); + +diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp +index 1be72efe4de8..2a81fbcde79d 100644 +--- a/clang/lib/AST/ASTContext.cpp ++++ b/clang/lib/AST/ASTContext.cpp +@@ -1490,7 +1490,8 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, + ObjCSuperType = QualType(); + + // void * type +- if (LangOpts.OpenCLVersion >= 200) { ++ if (Target.getSupportedOpenCLOpts().isEnabled( ++ "__opencl_c_generic_address_space")) { + auto Q = VoidTy.getQualifiers(); + Q.setAddressSpace(LangAS::opencl_generic); + VoidPtrTy = getPointerType(getCanonicalType( +diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp +index 0cd89df41b67..7a3067345098 100644 +--- a/clang/lib/Basic/Builtins.cpp ++++ b/clang/lib/Basic/Builtins.cpp +@@ -23,6 +23,8 @@ static const Builtin::Info BuiltinInfo[] = { + { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr }, + #define LANGBUILTIN(ID, TYPE, ATTRS, LANGS) \ + { #ID, TYPE, ATTRS, nullptr, LANGS, nullptr }, ++#define OPENCLBUILTIN(ID, TYPE, ATTRS, LANGS, FEATURE) \ ++ {#ID, TYPE, ATTRS, nullptr, LANGS, FEATURE}, + #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS) \ + { #ID, TYPE, ATTRS, HEADER, LANGS, nullptr }, + #include "clang/Basic/Builtins.def" +@@ -69,16 +71,17 @@ bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo, + bool ObjCUnsupported = !LangOpts.ObjC && BuiltinInfo.Langs == OBJC_LANG; + bool OclC1Unsupported = (LangOpts.OpenCLVersion / 100) != 1 && + (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES ) == OCLC1X_LANG; +- bool OclC2Unsupported = +- (LangOpts.OpenCLVersion != 200 && !LangOpts.OpenCLCPlusPlus) && +- (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC20_LANG; ++ bool OclC2PUnsupported = ++ (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC2P_LANG && ++ ((LangOpts.OpenCLVersion < 200 && !LangOpts.OpenCLCPlusPlus) || ++ !OclBuiltinIsSupported(BuiltinInfo, LangOpts)); + bool OclCUnsupported = !LangOpts.OpenCL && + (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES); + bool OpenMPUnsupported = !LangOpts.OpenMP && BuiltinInfo.Langs == OMP_LANG; + bool CPlusPlusUnsupported = + !LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG; + return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported && +- !OclC1Unsupported && !OclC2Unsupported && !OpenMPUnsupported && ++ !OclC1Unsupported && !OclC2PUnsupported && !OpenMPUnsupported && + !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported && + !CPlusPlusUnsupported; + } +@@ -191,3 +194,20 @@ bool Builtin::Context::canBeRedeclared(unsigned ID) const { + (!hasReferenceArgsOrResult(ID) && + !hasCustomTypechecking(ID)); + } ++ ++bool Builtin::Context::OclBuiltinIsSupported( ++ const Builtin::Info &BuiltinInfo, const LangOptions &LangOpts) const { ++ if (!requiresFeatures(BuiltinInfo)) ++ return true; ++ ++ return llvm::StringSwitch(BuiltinInfo.Features) ++ .Case("__opencl_c_device_enqueue", LangOpts.Blocks) ++ .Case("__opencl_c_generic_address_space", LangOpts.OpenCLGenericKeyword) ++ .Case("__opencl_c_pipes", LangOpts.OpenCLPipeKeyword) ++ .Default(false); ++} ++ ++bool Builtin::Context::requiresFeatures( ++ const Builtin::Info &BuiltinInfo) const { ++ return BuiltinInfo.Features && llvm::StringRef(BuiltinInfo.Features) != ""; ++} +diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp +index 3a21a19e1f19..795311d5934e 100644 +--- a/clang/lib/Basic/TargetInfo.cpp ++++ b/clang/lib/Basic/TargetInfo.cpp +@@ -377,6 +377,17 @@ void TargetInfo::adjust(LangOptions &Opts) { + HalfFormat = &llvm::APFloat::IEEEhalf(); + FloatFormat = &llvm::APFloat::IEEEsingle(); + LongDoubleFormat = &llvm::APFloat::IEEEquad(); ++ ++ auto &SupportedOCLOpts = getTargetOpts().SupportedOpenCLOptions; ++ ++ SupportedOCLOpts.setOpenCLVersion(Opts); ++ SupportedOCLOpts.adjustFeatures(); ++ ++ if (!Opts.OpenCLCPlusPlus && Opts.OpenCLVersion >= 200) ++ Opts.Blocks = SupportedOCLOpts.isSupported("__opencl_c_device_enqueue"); ++ Opts.OpenCLGenericKeyword = ++ SupportedOCLOpts.isSupported("__opencl_c_generic_address_space"); ++ Opts.OpenCLPipeKeyword = SupportedOCLOpts.isSupported("__opencl_c_pipes"); + } + + if (Opts.LongDoubleSize) { +diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp +index c063f8ca4472..b394be18a234 100644 +--- a/clang/lib/Basic/Targets.cpp ++++ b/clang/lib/Basic/Targets.cpp +@@ -39,7 +39,6 @@ + #include "clang/Basic/Diagnostic.h" + #include "llvm/ADT/StringExtras.h" + #include "llvm/ADT/Triple.h" +- + using namespace clang; + + namespace clang { +diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp +index 648e6d9c214a..6d839fa61abc 100644 +--- a/clang/lib/CodeGen/CodeGenFunction.cpp ++++ b/clang/lib/CodeGen/CodeGenFunction.cpp +@@ -2303,11 +2303,11 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, + std::string MissingFeature; + if (BuiltinID) { + SmallVector ReqFeatures; +- const char *FeatureList = +- CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); + // Return if the builtin doesn't have any required features. +- if (!FeatureList || StringRef(FeatureList) == "") ++ if (!CGM.getContext().BuiltinInfo.requiresFeatures(BuiltinID)) + return; ++ const char *FeatureList = ++ CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); + StringRef(FeatureList).split(ReqFeatures, ','); + if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) + CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature) +diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp +index e98a407ac42f..18fa06bf3c6d 100644 +--- a/clang/lib/Frontend/CompilerInvocation.cpp ++++ b/clang/lib/Frontend/CompilerInvocation.cpp +@@ -2298,6 +2298,8 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, + Opts.OpenCLVersion = 120; + else if (LangStd == LangStandard::lang_opencl20) + Opts.OpenCLVersion = 200; ++ else if (LangStd == LangStandard::lang_opencl30) ++ Opts.OpenCLVersion = 300; + else if (LangStd == LangStandard::lang_openclcpp) + Opts.OpenCLCPlusPlusVersion = 100; + +@@ -2498,14 +2500,15 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, + // -cl-std only applies for OpenCL language standards. + // Override the -std option in this case. + if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) { +- LangStandard::Kind OpenCLLangStd +- = llvm::StringSwitch(A->getValue()) +- .Cases("cl", "CL", LangStandard::lang_opencl10) +- .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) +- .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) +- .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) +- .Cases("clc++", "CLC++", LangStandard::lang_openclcpp) +- .Default(LangStandard::lang_unspecified); ++ LangStandard::Kind OpenCLLangStd = ++ llvm::StringSwitch(A->getValue()) ++ .Cases("cl", "CL", LangStandard::lang_opencl10) ++ .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) ++ .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) ++ .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) ++ .Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30) ++ .Cases("clc++", "CLC++", LangStandard::lang_openclcpp) ++ .Default(LangStandard::lang_unspecified); + + if (OpenCLLangStd == LangStandard::lang_unspecified) { + Diags.Report(diag::err_drv_invalid_value) +@@ -2787,8 +2790,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, + + Opts.RTTI = Opts.CPlusPlus && !Args.hasArg(OPT_fno_rtti); + Opts.RTTIData = Opts.RTTI && !Args.hasArg(OPT_fno_rtti_data); +- Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL +- && Opts.OpenCLVersion == 200); ++ Opts.Blocks = Args.hasArg(OPT_fblocks); + Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional); + Opts.Coroutines = Opts.CPlusPlus2a || Args.hasArg(OPT_fcoroutines_ts); + +diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp +index c273cb96d9b9..aefd208e6cd3 100644 +--- a/clang/lib/Frontend/InitPreprocessor.cpp ++++ b/clang/lib/Frontend/InitPreprocessor.cpp +@@ -445,6 +445,9 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, + case 200: + Builder.defineMacro("__OPENCL_C_VERSION__", "200"); + break; ++ case 300: ++ Builder.defineMacro("__OPENCL_C_VERSION__", "300"); ++ break; + default: + llvm_unreachable("Unsupported OpenCL version"); + } +@@ -453,6 +456,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, + Builder.defineMacro("CL_VERSION_1_1", "110"); + Builder.defineMacro("CL_VERSION_1_2", "120"); + Builder.defineMacro("CL_VERSION_2_0", "200"); ++ Builder.defineMacro("CL_VERSION_3_0", "300"); + + if (TI.isLittleEndian()) + Builder.defineMacro("__ENDIAN_LITTLE__"); +@@ -1101,7 +1105,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, + // OpenCL definitions. + if (LangOpts.OpenCL) { + #define OPENCLEXT(Ext) \ +- if (TI.getSupportedOpenCLOpts().isSupported(#Ext, LangOpts)) \ ++ if (TI.getSupportedOpenCLOpts().isSupported(#Ext)) \ + Builder.defineMacro(#Ext); + #include "clang/Basic/OpenCLExtensions.def" + +diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h +index 430e07d36f62..2cc688ccc3da 100644 +--- a/clang/lib/Headers/opencl-c-base.h ++++ b/clang/lib/Headers/opencl-c-base.h +@@ -9,6 +9,59 @@ + #ifndef _OPENCL_BASE_H_ + #define _OPENCL_BASE_H_ + ++// Add predefined macros to build headers with standalone executable ++#ifndef CL_VERSION_3_0 ++ #define CL_VERSION_3_0 300 ++#endif ++ ++// Define features for 2.0 for header backward compatibility ++#ifndef __opencl_c_int64 ++ #define __opencl_c_int64 1 ++#endif ++#if __OPENCL_C_VERSION__ != CL_VERSION_3_0 ++ #ifndef __opencl_c_images ++ #define __opencl_c_images 1 ++ #endif ++#endif ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) ++#ifndef __opencl_c_pipes ++ #define __opencl_c_pipes 1 ++#endif ++#ifndef __opencl_c_generic_address_space ++ #define __opencl_c_generic_address_space 1 ++#endif ++#ifndef __opencl_c_work_group_collective_functions ++ #define __opencl_c_work_group_collective_functions 1 ++#endif ++#ifndef __opencl_c_atomic_order_acq_rel ++ #define __opencl_c_atomic_order_acq_rel 1 ++#endif ++#ifndef __opencl_c_atomic_order_seq_cst ++ #define __opencl_c_atomic_order_seq_cst 1 ++#endif ++#ifndef __opencl_c_atomic_scope_device ++ #define __opencl_c_atomic_scope_device 1 ++#endif ++#ifndef __opencl_c_atomic_scope_all_devices ++ #define __opencl_c_atomic_scope_all_devices 1 ++#endif ++#ifndef __opencl_c_subgroups ++ #define __opencl_c_subgroups 1 ++#endif ++#ifndef __opencl_c_3d_image_writes ++ #define __opencl_c_3d_image_writes 1 ++#endif ++#ifndef __opencl_c_device_enqueue ++ #define __opencl_c_device_enqueue 1 ++#endif ++#ifndef __opencl_c_read_write_images ++ #define __opencl_c_read_write_images 1 ++#endif ++#ifndef __opencl_c_program_scope_global_variables ++ #define __opencl_c_program_scope_global_variables 1 ++#endif ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) ++ + // built-in scalar data types: + + /** +@@ -115,7 +168,12 @@ typedef half half4 __attribute__((ext_vector_type(4))); + typedef half half8 __attribute__((ext_vector_type(8))); + typedef half half16 __attribute__((ext_vector_type(16))); + #endif +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++ ++#ifndef __opencl_c_fp64 ++ #define __opencl_c_fp64 1 ++#endif ++ + #if __OPENCL_C_VERSION__ < CL_VERSION_1_2 + #pragma OPENCL EXTENSION cl_khr_fp64 : enable + #endif +@@ -281,9 +339,17 @@ typedef uint cl_mem_fence_flags; + typedef enum memory_scope { + memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, ++#ifdef __opencl_c_atomic_scope_device + memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, ++#endif ++#ifdef __opencl_c_atomic_scope_all_devices ++ #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ memory_scope_all_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, ++ #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, +-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ++#endif ++#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ ++ defined(__opencl_c_subgroups) + memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP + #endif + } memory_scope; +@@ -301,13 +367,14 @@ typedef enum memory_scope { + #define ATOMIC_FLAG_INIT 0 + + // enum values aligned with what clang uses in EmitAtomicExpr() +-typedef enum memory_order +-{ ++typedef enum memory_order { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, ++#ifdef __opencl_c_atomic_order_seq_cst + memory_order_seq_cst = __ATOMIC_SEQ_CST ++#endif //__opencl_c_atomic_order_seq_cst + } memory_order; + + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h +index 3210f93cc851..93a946cec5b1 100644 +--- a/clang/lib/Headers/opencl-c.h ++++ b/clang/lib/Headers/opencl-c.h +@@ -35,7 +35,6 @@ + #define __purefn __attribute__((pure)) + #define __cnfn __attribute__((const)) + +- + // OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions + + char __ovld __cnfn convert_char_rte(char); +@@ -4632,7 +4631,7 @@ float16 __ovld __cnfn convert_float16(float16); + + // Conversions with double data type parameters or return value. + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + char __ovld __cnfn convert_char(double); + char __ovld __cnfn convert_char_rte(double); + char __ovld __cnfn convert_char_rtn(double); +@@ -5452,7 +5451,7 @@ double16 __ovld __cnfn convert_double16_rtz(uchar16); + double16 __ovld __cnfn convert_double16_rtz(uint16); + double16 __ovld __cnfn convert_double16_rtz(ulong16); + double16 __ovld __cnfn convert_double16_rtz(ushort16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + // Convert half types to non-double types. +@@ -6270,7 +6269,7 @@ half16 __ovld __cnfn convert_half16_rtz(float16); + half16 __ovld __cnfn convert_half16_rtz(half16); + + // Convert half types to double types. +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn convert_double(half); + double __ovld __cnfn convert_double_rte(half); + double __ovld __cnfn convert_double_rtp(half); +@@ -6333,7 +6332,7 @@ half16 __ovld __cnfn convert_half16_rte(double16); + half16 __ovld __cnfn convert_half16_rtp(double16); + half16 __ovld __cnfn convert_half16_rtn(double16); + half16 __ovld __cnfn convert_half16_rtz(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #endif // cl_khr_fp16 + +@@ -6404,14 +6403,14 @@ half16 __ovld __cnfn convert_half16_rtz(double16); + #define as_float8(x) __builtin_astype((x), float8) + #define as_float16(x) __builtin_astype((x), float16) + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #define as_double(x) __builtin_astype((x), double) + #define as_double2(x) __builtin_astype((x), double2) + #define as_double3(x) __builtin_astype((x), double3) + #define as_double4(x) __builtin_astype((x), double4) + #define as_double8(x) __builtin_astype((x), double8) + #define as_double16(x) __builtin_astype((x), double16) +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + #define as_half(x) __builtin_astype((x), half) +@@ -6534,14 +6533,14 @@ float3 __ovld __cnfn acos(float3); + float4 __ovld __cnfn acos(float4); + float8 __ovld __cnfn acos(float8); + float16 __ovld __cnfn acos(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn acos(double); + double2 __ovld __cnfn acos(double2); + double3 __ovld __cnfn acos(double3); + double4 __ovld __cnfn acos(double4); + double8 __ovld __cnfn acos(double8); + double16 __ovld __cnfn acos(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn acos(half); + half2 __ovld __cnfn acos(half2); +@@ -6560,14 +6559,14 @@ float3 __ovld __cnfn acosh(float3); + float4 __ovld __cnfn acosh(float4); + float8 __ovld __cnfn acosh(float8); + float16 __ovld __cnfn acosh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn acosh(double); + double2 __ovld __cnfn acosh(double2); + double3 __ovld __cnfn acosh(double3); + double4 __ovld __cnfn acosh(double4); + double8 __ovld __cnfn acosh(double8); + double16 __ovld __cnfn acosh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn acosh(half); + half2 __ovld __cnfn acosh(half2); +@@ -6586,14 +6585,14 @@ float3 __ovld __cnfn acospi(float3 x); + float4 __ovld __cnfn acospi(float4 x); + float8 __ovld __cnfn acospi(float8 x); + float16 __ovld __cnfn acospi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn acospi(double x); + double2 __ovld __cnfn acospi(double2 x); + double3 __ovld __cnfn acospi(double3 x); + double4 __ovld __cnfn acospi(double4 x); + double8 __ovld __cnfn acospi(double8 x); + double16 __ovld __cnfn acospi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn acospi(half x); + half2 __ovld __cnfn acospi(half2 x); +@@ -6612,14 +6611,14 @@ float3 __ovld __cnfn asin(float3); + float4 __ovld __cnfn asin(float4); + float8 __ovld __cnfn asin(float8); + float16 __ovld __cnfn asin(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn asin(double); + double2 __ovld __cnfn asin(double2); + double3 __ovld __cnfn asin(double3); + double4 __ovld __cnfn asin(double4); + double8 __ovld __cnfn asin(double8); + double16 __ovld __cnfn asin(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn asin(half); + half2 __ovld __cnfn asin(half2); +@@ -6638,14 +6637,14 @@ float3 __ovld __cnfn asinh(float3); + float4 __ovld __cnfn asinh(float4); + float8 __ovld __cnfn asinh(float8); + float16 __ovld __cnfn asinh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn asinh(double); + double2 __ovld __cnfn asinh(double2); + double3 __ovld __cnfn asinh(double3); + double4 __ovld __cnfn asinh(double4); + double8 __ovld __cnfn asinh(double8); + double16 __ovld __cnfn asinh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn asinh(half); + half2 __ovld __cnfn asinh(half2); +@@ -6664,14 +6663,14 @@ float3 __ovld __cnfn asinpi(float3 x); + float4 __ovld __cnfn asinpi(float4 x); + float8 __ovld __cnfn asinpi(float8 x); + float16 __ovld __cnfn asinpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn asinpi(double x); + double2 __ovld __cnfn asinpi(double2 x); + double3 __ovld __cnfn asinpi(double3 x); + double4 __ovld __cnfn asinpi(double4 x); + double8 __ovld __cnfn asinpi(double8 x); + double16 __ovld __cnfn asinpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn asinpi(half x); + half2 __ovld __cnfn asinpi(half2 x); +@@ -6690,14 +6689,14 @@ float3 __ovld __cnfn atan(float3 y_over_x); + float4 __ovld __cnfn atan(float4 y_over_x); + float8 __ovld __cnfn atan(float8 y_over_x); + float16 __ovld __cnfn atan(float16 y_over_x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atan(double y_over_x); + double2 __ovld __cnfn atan(double2 y_over_x); + double3 __ovld __cnfn atan(double3 y_over_x); + double4 __ovld __cnfn atan(double4 y_over_x); + double8 __ovld __cnfn atan(double8 y_over_x); + double16 __ovld __cnfn atan(double16 y_over_x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atan(half y_over_x); + half2 __ovld __cnfn atan(half2 y_over_x); +@@ -6716,14 +6715,14 @@ float3 __ovld __cnfn atan2(float3 y, float3 x); + float4 __ovld __cnfn atan2(float4 y, float4 x); + float8 __ovld __cnfn atan2(float8 y, float8 x); + float16 __ovld __cnfn atan2(float16 y, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atan2(double y, double x); + double2 __ovld __cnfn atan2(double2 y, double2 x); + double3 __ovld __cnfn atan2(double3 y, double3 x); + double4 __ovld __cnfn atan2(double4 y, double4 x); + double8 __ovld __cnfn atan2(double8 y, double8 x); + double16 __ovld __cnfn atan2(double16 y, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atan2(half y, half x); + half2 __ovld __cnfn atan2(half2 y, half2 x); +@@ -6742,14 +6741,14 @@ float3 __ovld __cnfn atanh(float3); + float4 __ovld __cnfn atanh(float4); + float8 __ovld __cnfn atanh(float8); + float16 __ovld __cnfn atanh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atanh(double); + double2 __ovld __cnfn atanh(double2); + double3 __ovld __cnfn atanh(double3); + double4 __ovld __cnfn atanh(double4); + double8 __ovld __cnfn atanh(double8); + double16 __ovld __cnfn atanh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atanh(half); + half2 __ovld __cnfn atanh(half2); +@@ -6768,14 +6767,14 @@ float3 __ovld __cnfn atanpi(float3 x); + float4 __ovld __cnfn atanpi(float4 x); + float8 __ovld __cnfn atanpi(float8 x); + float16 __ovld __cnfn atanpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atanpi(double x); + double2 __ovld __cnfn atanpi(double2 x); + double3 __ovld __cnfn atanpi(double3 x); + double4 __ovld __cnfn atanpi(double4 x); + double8 __ovld __cnfn atanpi(double8 x); + double16 __ovld __cnfn atanpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atanpi(half x); + half2 __ovld __cnfn atanpi(half2 x); +@@ -6794,14 +6793,14 @@ float3 __ovld __cnfn atan2pi(float3 y, float3 x); + float4 __ovld __cnfn atan2pi(float4 y, float4 x); + float8 __ovld __cnfn atan2pi(float8 y, float8 x); + float16 __ovld __cnfn atan2pi(float16 y, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atan2pi(double y, double x); + double2 __ovld __cnfn atan2pi(double2 y, double2 x); + double3 __ovld __cnfn atan2pi(double3 y, double3 x); + double4 __ovld __cnfn atan2pi(double4 y, double4 x); + double8 __ovld __cnfn atan2pi(double8 y, double8 x); + double16 __ovld __cnfn atan2pi(double16 y, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atan2pi(half y, half x); + half2 __ovld __cnfn atan2pi(half2 y, half2 x); +@@ -6820,14 +6819,14 @@ float3 __ovld __cnfn cbrt(float3); + float4 __ovld __cnfn cbrt(float4); + float8 __ovld __cnfn cbrt(float8); + float16 __ovld __cnfn cbrt(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cbrt(double); + double2 __ovld __cnfn cbrt(double2); + double3 __ovld __cnfn cbrt(double3); + double4 __ovld __cnfn cbrt(double4); + double8 __ovld __cnfn cbrt(double8); + double16 __ovld __cnfn cbrt(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cbrt(half); + half2 __ovld __cnfn cbrt(half2); +@@ -6847,14 +6846,14 @@ float3 __ovld __cnfn ceil(float3); + float4 __ovld __cnfn ceil(float4); + float8 __ovld __cnfn ceil(float8); + float16 __ovld __cnfn ceil(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn ceil(double); + double2 __ovld __cnfn ceil(double2); + double3 __ovld __cnfn ceil(double3); + double4 __ovld __cnfn ceil(double4); + double8 __ovld __cnfn ceil(double8); + double16 __ovld __cnfn ceil(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn ceil(half); + half2 __ovld __cnfn ceil(half2); +@@ -6873,14 +6872,14 @@ float3 __ovld __cnfn copysign(float3 x, float3 y); + float4 __ovld __cnfn copysign(float4 x, float4 y); + float8 __ovld __cnfn copysign(float8 x, float8 y); + float16 __ovld __cnfn copysign(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn copysign(double x, double y); + double2 __ovld __cnfn copysign(double2 x, double2 y); + double3 __ovld __cnfn copysign(double3 x, double3 y); + double4 __ovld __cnfn copysign(double4 x, double4 y); + double8 __ovld __cnfn copysign(double8 x, double8 y); + double16 __ovld __cnfn copysign(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn copysign(half x, half y); + half2 __ovld __cnfn copysign(half2 x, half2 y); +@@ -6899,14 +6898,14 @@ float3 __ovld __cnfn cos(float3); + float4 __ovld __cnfn cos(float4); + float8 __ovld __cnfn cos(float8); + float16 __ovld __cnfn cos(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cos(double); + double2 __ovld __cnfn cos(double2); + double3 __ovld __cnfn cos(double3); + double4 __ovld __cnfn cos(double4); + double8 __ovld __cnfn cos(double8); + double16 __ovld __cnfn cos(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cos(half); + half2 __ovld __cnfn cos(half2); +@@ -6925,14 +6924,14 @@ float3 __ovld __cnfn cosh(float3); + float4 __ovld __cnfn cosh(float4); + float8 __ovld __cnfn cosh(float8); + float16 __ovld __cnfn cosh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cosh(double); + double2 __ovld __cnfn cosh(double2); + double3 __ovld __cnfn cosh(double3); + double4 __ovld __cnfn cosh(double4); + double8 __ovld __cnfn cosh(double8); + double16 __ovld __cnfn cosh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cosh(half); + half2 __ovld __cnfn cosh(half2); +@@ -6951,14 +6950,14 @@ float3 __ovld __cnfn cospi(float3 x); + float4 __ovld __cnfn cospi(float4 x); + float8 __ovld __cnfn cospi(float8 x); + float16 __ovld __cnfn cospi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cospi(double x); + double2 __ovld __cnfn cospi(double2 x); + double3 __ovld __cnfn cospi(double3 x); + double4 __ovld __cnfn cospi(double4 x); + double8 __ovld __cnfn cospi(double8 x); + double16 __ovld __cnfn cospi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cospi(half x); + half2 __ovld __cnfn cospi(half2 x); +@@ -6977,14 +6976,14 @@ float3 __ovld __cnfn erfc(float3); + float4 __ovld __cnfn erfc(float4); + float8 __ovld __cnfn erfc(float8); + float16 __ovld __cnfn erfc(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn erfc(double); + double2 __ovld __cnfn erfc(double2); + double3 __ovld __cnfn erfc(double3); + double4 __ovld __cnfn erfc(double4); + double8 __ovld __cnfn erfc(double8); + double16 __ovld __cnfn erfc(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn erfc(half); + half2 __ovld __cnfn erfc(half2); +@@ -7004,14 +7003,14 @@ float3 __ovld __cnfn erf(float3); + float4 __ovld __cnfn erf(float4); + float8 __ovld __cnfn erf(float8); + float16 __ovld __cnfn erf(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn erf(double); + double2 __ovld __cnfn erf(double2); + double3 __ovld __cnfn erf(double3); + double4 __ovld __cnfn erf(double4); + double8 __ovld __cnfn erf(double8); + double16 __ovld __cnfn erf(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn erf(half); + half2 __ovld __cnfn erf(half2); +@@ -7030,14 +7029,14 @@ float3 __ovld __cnfn exp(float3 x); + float4 __ovld __cnfn exp(float4 x); + float8 __ovld __cnfn exp(float8 x); + float16 __ovld __cnfn exp(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn exp(double x); + double2 __ovld __cnfn exp(double2 x); + double3 __ovld __cnfn exp(double3 x); + double4 __ovld __cnfn exp(double4 x); + double8 __ovld __cnfn exp(double8 x); + double16 __ovld __cnfn exp(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn exp(half x); + half2 __ovld __cnfn exp(half2 x); +@@ -7056,14 +7055,14 @@ float3 __ovld __cnfn exp2(float3); + float4 __ovld __cnfn exp2(float4); + float8 __ovld __cnfn exp2(float8); + float16 __ovld __cnfn exp2(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn exp2(double); + double2 __ovld __cnfn exp2(double2); + double3 __ovld __cnfn exp2(double3); + double4 __ovld __cnfn exp2(double4); + double8 __ovld __cnfn exp2(double8); + double16 __ovld __cnfn exp2(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn exp2(half); + half2 __ovld __cnfn exp2(half2); +@@ -7082,14 +7081,14 @@ float3 __ovld __cnfn exp10(float3); + float4 __ovld __cnfn exp10(float4); + float8 __ovld __cnfn exp10(float8); + float16 __ovld __cnfn exp10(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn exp10(double); + double2 __ovld __cnfn exp10(double2); + double3 __ovld __cnfn exp10(double3); + double4 __ovld __cnfn exp10(double4); + double8 __ovld __cnfn exp10(double8); + double16 __ovld __cnfn exp10(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn exp10(half); + half2 __ovld __cnfn exp10(half2); +@@ -7108,14 +7107,14 @@ float3 __ovld __cnfn expm1(float3 x); + float4 __ovld __cnfn expm1(float4 x); + float8 __ovld __cnfn expm1(float8 x); + float16 __ovld __cnfn expm1(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn expm1(double x); + double2 __ovld __cnfn expm1(double2 x); + double3 __ovld __cnfn expm1(double3 x); + double4 __ovld __cnfn expm1(double4 x); + double8 __ovld __cnfn expm1(double8 x); + double16 __ovld __cnfn expm1(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn expm1(half x); + half2 __ovld __cnfn expm1(half2 x); +@@ -7134,14 +7133,14 @@ float3 __ovld __cnfn fabs(float3); + float4 __ovld __cnfn fabs(float4); + float8 __ovld __cnfn fabs(float8); + float16 __ovld __cnfn fabs(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fabs(double); + double2 __ovld __cnfn fabs(double2); + double3 __ovld __cnfn fabs(double3); + double4 __ovld __cnfn fabs(double4); + double8 __ovld __cnfn fabs(double8); + double16 __ovld __cnfn fabs(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fabs(half); + half2 __ovld __cnfn fabs(half2); +@@ -7160,14 +7159,14 @@ float3 __ovld __cnfn fdim(float3 x, float3 y); + float4 __ovld __cnfn fdim(float4 x, float4 y); + float8 __ovld __cnfn fdim(float8 x, float8 y); + float16 __ovld __cnfn fdim(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fdim(double x, double y); + double2 __ovld __cnfn fdim(double2 x, double2 y); + double3 __ovld __cnfn fdim(double3 x, double3 y); + double4 __ovld __cnfn fdim(double4 x, double4 y); + double8 __ovld __cnfn fdim(double8 x, double8 y); + double16 __ovld __cnfn fdim(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fdim(half x, half y); + half2 __ovld __cnfn fdim(half2 x, half2 y); +@@ -7187,14 +7186,14 @@ float3 __ovld __cnfn floor(float3); + float4 __ovld __cnfn floor(float4); + float8 __ovld __cnfn floor(float8); + float16 __ovld __cnfn floor(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn floor(double); + double2 __ovld __cnfn floor(double2); + double3 __ovld __cnfn floor(double3); + double4 __ovld __cnfn floor(double4); + double8 __ovld __cnfn floor(double8); + double16 __ovld __cnfn floor(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn floor(half); + half2 __ovld __cnfn floor(half2); +@@ -7217,14 +7216,14 @@ float3 __ovld __cnfn fma(float3 a, float3 b, float3 c); + float4 __ovld __cnfn fma(float4 a, float4 b, float4 c); + float8 __ovld __cnfn fma(float8 a, float8 b, float8 c); + float16 __ovld __cnfn fma(float16 a, float16 b, float16 c); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fma(double a, double b, double c); + double2 __ovld __cnfn fma(double2 a, double2 b, double2 c); + double3 __ovld __cnfn fma(double3 a, double3 b, double3 c); + double4 __ovld __cnfn fma(double4 a, double4 b, double4 c); + double8 __ovld __cnfn fma(double8 a, double8 b, double8 c); + double16 __ovld __cnfn fma(double16 a, double16 b, double16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fma(half a, half b, half c); + half2 __ovld __cnfn fma(half2 a, half2 b, half2 c); +@@ -7251,7 +7250,7 @@ float3 __ovld __cnfn fmax(float3 x, float y); + float4 __ovld __cnfn fmax(float4 x, float y); + float8 __ovld __cnfn fmax(float8 x, float y); + float16 __ovld __cnfn fmax(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fmax(double x, double y); + double2 __ovld __cnfn fmax(double2 x, double2 y); + double3 __ovld __cnfn fmax(double3 x, double3 y); +@@ -7263,7 +7262,7 @@ double3 __ovld __cnfn fmax(double3 x, double y); + double4 __ovld __cnfn fmax(double4 x, double y); + double8 __ovld __cnfn fmax(double8 x, double y); + double16 __ovld __cnfn fmax(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fmax(half x, half y); + half2 __ovld __cnfn fmax(half2 x, half2 y); +@@ -7295,7 +7294,7 @@ float3 __ovld __cnfn fmin(float3 x, float y); + float4 __ovld __cnfn fmin(float4 x, float y); + float8 __ovld __cnfn fmin(float8 x, float y); + float16 __ovld __cnfn fmin(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fmin(double x, double y); + double2 __ovld __cnfn fmin(double2 x, double2 y); + double3 __ovld __cnfn fmin(double3 x, double3 y); +@@ -7307,7 +7306,7 @@ double3 __ovld __cnfn fmin(double3 x, double y); + double4 __ovld __cnfn fmin(double4 x, double y); + double8 __ovld __cnfn fmin(double8 x, double y); + double16 __ovld __cnfn fmin(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fmin(half x, half y); + half2 __ovld __cnfn fmin(half2 x, half2 y); +@@ -7331,14 +7330,14 @@ float3 __ovld __cnfn fmod(float3 x, float3 y); + float4 __ovld __cnfn fmod(float4 x, float4 y); + float8 __ovld __cnfn fmod(float8 x, float8 y); + float16 __ovld __cnfn fmod(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fmod(double x, double y); + double2 __ovld __cnfn fmod(double2 x, double2 y); + double3 __ovld __cnfn fmod(double3 x, double3 y); + double4 __ovld __cnfn fmod(double4 x, double4 y); + double8 __ovld __cnfn fmod(double8 x, double8 y); + double16 __ovld __cnfn fmod(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fmod(half x, half y); + half2 __ovld __cnfn fmod(half2 x, half2 y); +@@ -7352,21 +7351,21 @@ half16 __ovld __cnfn fmod(half16 x, half16 y); + * Returns fmin(x - floor (x), 0x1.fffffep-1f ). + * floor(x) is returned in iptr. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld fract(float x, float *iptr); + float2 __ovld fract(float2 x, float2 *iptr); + float3 __ovld fract(float3 x, float3 *iptr); + float4 __ovld fract(float4 x, float4 *iptr); + float8 __ovld fract(float8 x, float8 *iptr); + float16 __ovld fract(float16 x, float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld fract(double x, double *iptr); + double2 __ovld fract(double2 x, double2 *iptr); + double3 __ovld fract(double3 x, double3 *iptr); + double4 __ovld fract(double4 x, double4 *iptr); + double8 __ovld fract(double8 x, double8 *iptr); + double16 __ovld fract(double16 x, double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld fract(half x, half *iptr); + half2 __ovld fract(half2 x, half2 *iptr); +@@ -7375,7 +7374,9 @@ half4 __ovld fract(half4 x, half4 *iptr); + half8 __ovld fract(half8 x, half8 *iptr); + half16 __ovld fract(half16 x, half16 *iptr); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld fract(float x, __global float *iptr); + float2 __ovld fract(float2 x, __global float2 *iptr); + float3 __ovld fract(float3 x, __global float3 *iptr); +@@ -7394,7 +7395,7 @@ float3 __ovld fract(float3 x, __private float3 *iptr); + float4 __ovld fract(float4 x, __private float4 *iptr); + float8 __ovld fract(float8 x, __private float8 *iptr); + float16 __ovld fract(float16 x, __private float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld fract(double x, __global double *iptr); + double2 __ovld fract(double2 x, __global double2 *iptr); + double3 __ovld fract(double3 x, __global double3 *iptr); +@@ -7413,7 +7414,7 @@ double3 __ovld fract(double3 x, __private double3 *iptr); + double4 __ovld fract(double4 x, __private double4 *iptr); + double8 __ovld fract(double8 x, __private double8 *iptr); + double16 __ovld fract(double16 x, __private double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld fract(half x, __global half *iptr); + half2 __ovld fract(half2 x, __global half2 *iptr); +@@ -7434,29 +7435,29 @@ half4 __ovld fract(half4 x, __private half4 *iptr); + half8 __ovld fract(half8 x, __private half8 *iptr); + half16 __ovld fract(half16 x, __private half16 *iptr); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +- ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Extract mantissa and exponent from x. For each + * component the mantissa returned is a float with + * magnitude in the interval [1/2, 1) or 0. Each + * component of x equals mantissa returned * 2^exp. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld frexp(float x, int *exp); + float2 __ovld frexp(float2 x, int2 *exp); + float3 __ovld frexp(float3 x, int3 *exp); + float4 __ovld frexp(float4 x, int4 *exp); + float8 __ovld frexp(float8 x, int8 *exp); + float16 __ovld frexp(float16 x, int16 *exp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld frexp(double x, int *exp); + double2 __ovld frexp(double2 x, int2 *exp); + double3 __ovld frexp(double3 x, int3 *exp); + double4 __ovld frexp(double4 x, int4 *exp); + double8 __ovld frexp(double8 x, int8 *exp); + double16 __ovld frexp(double16 x, int16 *exp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld frexp(half x, int *exp); + half2 __ovld frexp(half2 x, int2 *exp); +@@ -7465,7 +7466,9 @@ half4 __ovld frexp(half4 x, int4 *exp); + half8 __ovld frexp(half8 x, int8 *exp); + half16 __ovld frexp(half16 x, int16 *exp); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld frexp(float x, __global int *exp); + float2 __ovld frexp(float2 x, __global int2 *exp); + float3 __ovld frexp(float3 x, __global int3 *exp); +@@ -7484,7 +7487,7 @@ float3 __ovld frexp(float3 x, __private int3 *exp); + float4 __ovld frexp(float4 x, __private int4 *exp); + float8 __ovld frexp(float8 x, __private int8 *exp); + float16 __ovld frexp(float16 x, __private int16 *exp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld frexp(double x, __global int *exp); + double2 __ovld frexp(double2 x, __global int2 *exp); + double3 __ovld frexp(double3 x, __global int3 *exp); +@@ -7503,7 +7506,7 @@ double3 __ovld frexp(double3 x, __private int3 *exp); + double4 __ovld frexp(double4 x, __private int4 *exp); + double8 __ovld frexp(double8 x, __private int8 *exp); + double16 __ovld frexp(double16 x, __private int16 *exp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld frexp(half x, __global int *exp); + half2 __ovld frexp(half2 x, __global int2 *exp); +@@ -7524,7 +7527,8 @@ half4 __ovld frexp(half4 x, __private int4 *exp); + half8 __ovld frexp(half8 x, __private int8 *exp); + half16 __ovld frexp(half16 x, __private int16 *exp); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + + /** + * Compute the value of the square root of x^2 + y^2 +@@ -7536,14 +7540,14 @@ float3 __ovld __cnfn hypot(float3 x, float3 y); + float4 __ovld __cnfn hypot(float4 x, float4 y); + float8 __ovld __cnfn hypot(float8 x, float8 y); + float16 __ovld __cnfn hypot(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn hypot(double x, double y); + double2 __ovld __cnfn hypot(double2 x, double2 y); + double3 __ovld __cnfn hypot(double3 x, double3 y); + double4 __ovld __cnfn hypot(double4 x, double4 y); + double8 __ovld __cnfn hypot(double8 x, double8 y); + double16 __ovld __cnfn hypot(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn hypot(half x, half y); + half2 __ovld __cnfn hypot(half2 x, half2 y); +@@ -7562,14 +7566,14 @@ int3 __ovld __cnfn ilogb(float3 x); + int4 __ovld __cnfn ilogb(float4 x); + int8 __ovld __cnfn ilogb(float8 x); + int16 __ovld __cnfn ilogb(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn ilogb(double x); + int2 __ovld __cnfn ilogb(double2 x); + int3 __ovld __cnfn ilogb(double3 x); + int4 __ovld __cnfn ilogb(double4 x); + int8 __ovld __cnfn ilogb(double8 x); + int16 __ovld __cnfn ilogb(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn ilogb(half x); + int2 __ovld __cnfn ilogb(half2 x); +@@ -7593,7 +7597,7 @@ float3 __ovld __cnfn ldexp(float3 x, int n); + float4 __ovld __cnfn ldexp(float4 x, int n); + float8 __ovld __cnfn ldexp(float8 x, int n); + float16 __ovld __cnfn ldexp(float16 x, int n); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn ldexp(double x, int n); + double2 __ovld __cnfn ldexp(double2 x, int2 n); + double3 __ovld __cnfn ldexp(double3 x, int3 n); +@@ -7605,7 +7609,7 @@ double3 __ovld __cnfn ldexp(double3 x, int n); + double4 __ovld __cnfn ldexp(double4 x, int n); + double8 __ovld __cnfn ldexp(double8 x, int n); + double16 __ovld __cnfn ldexp(double16 x, int n); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn ldexp(half x, int n); + half2 __ovld __cnfn ldexp(half2 x, int2 n); +@@ -7632,14 +7636,14 @@ float3 __ovld __cnfn lgamma(float3 x); + float4 __ovld __cnfn lgamma(float4 x); + float8 __ovld __cnfn lgamma(float8 x); + float16 __ovld __cnfn lgamma(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn lgamma(double x); + double2 __ovld __cnfn lgamma(double2 x); + double3 __ovld __cnfn lgamma(double3 x); + double4 __ovld __cnfn lgamma(double4 x); + double8 __ovld __cnfn lgamma(double8 x); + double16 __ovld __cnfn lgamma(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn lgamma(half x); + half2 __ovld __cnfn lgamma(half2 x); +@@ -7649,21 +7653,21 @@ half8 __ovld __cnfn lgamma(half8 x); + half16 __ovld __cnfn lgamma(half16 x); + #endif //cl_khr_fp16 + +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld lgamma_r(float x, int *signp); + float2 __ovld lgamma_r(float2 x, int2 *signp); + float3 __ovld lgamma_r(float3 x, int3 *signp); + float4 __ovld lgamma_r(float4 x, int4 *signp); + float8 __ovld lgamma_r(float8 x, int8 *signp); + float16 __ovld lgamma_r(float16 x, int16 *signp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld lgamma_r(double x, int *signp); + double2 __ovld lgamma_r(double2 x, int2 *signp); + double3 __ovld lgamma_r(double3 x, int3 *signp); + double4 __ovld lgamma_r(double4 x, int4 *signp); + double8 __ovld lgamma_r(double8 x, int8 *signp); + double16 __ovld lgamma_r(double16 x, int16 *signp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld lgamma_r(half x, int *signp); + half2 __ovld lgamma_r(half2 x, int2 *signp); +@@ -7672,7 +7676,9 @@ half4 __ovld lgamma_r(half4 x, int4 *signp); + half8 __ovld lgamma_r(half8 x, int8 *signp); + half16 __ovld lgamma_r(half16 x, int16 *signp); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld lgamma_r(float x, __global int *signp); + float2 __ovld lgamma_r(float2 x, __global int2 *signp); + float3 __ovld lgamma_r(float3 x, __global int3 *signp); +@@ -7691,7 +7697,7 @@ float3 __ovld lgamma_r(float3 x, __private int3 *signp); + float4 __ovld lgamma_r(float4 x, __private int4 *signp); + float8 __ovld lgamma_r(float8 x, __private int8 *signp); + float16 __ovld lgamma_r(float16 x, __private int16 *signp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld lgamma_r(double x, __global int *signp); + double2 __ovld lgamma_r(double2 x, __global int2 *signp); + double3 __ovld lgamma_r(double3 x, __global int3 *signp); +@@ -7710,7 +7716,7 @@ double3 __ovld lgamma_r(double3 x, __private int3 *signp); + double4 __ovld lgamma_r(double4 x, __private int4 *signp); + double8 __ovld lgamma_r(double8 x, __private int8 *signp); + double16 __ovld lgamma_r(double16 x, __private int16 *signp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld lgamma_r(half x, __global int *signp); + half2 __ovld lgamma_r(half2 x, __global int2 *signp); +@@ -7731,8 +7737,8 @@ half4 __ovld lgamma_r(half4 x, __private int4 *signp); + half8 __ovld lgamma_r(half8 x, __private int8 *signp); + half16 __ovld lgamma_r(half16 x, __private int16 *signp); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +- ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Compute natural logarithm. + */ +@@ -7742,14 +7748,14 @@ float3 __ovld __cnfn log(float3); + float4 __ovld __cnfn log(float4); + float8 __ovld __cnfn log(float8); + float16 __ovld __cnfn log(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log(double); + double2 __ovld __cnfn log(double2); + double3 __ovld __cnfn log(double3); + double4 __ovld __cnfn log(double4); + double8 __ovld __cnfn log(double8); + double16 __ovld __cnfn log(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log(half); + half2 __ovld __cnfn log(half2); +@@ -7760,7 +7766,7 @@ half16 __ovld __cnfn log(half16); + #endif //cl_khr_fp16 + + /** +- * Compute a base 2 logarithm. ++ * Compute a base 2 logarithm + */ + float __ovld __cnfn log2(float); + float2 __ovld __cnfn log2(float2); +@@ -7768,14 +7774,14 @@ float3 __ovld __cnfn log2(float3); + float4 __ovld __cnfn log2(float4); + float8 __ovld __cnfn log2(float8); + float16 __ovld __cnfn log2(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log2(double); + double2 __ovld __cnfn log2(double2); + double3 __ovld __cnfn log2(double3); + double4 __ovld __cnfn log2(double4); + double8 __ovld __cnfn log2(double8); + double16 __ovld __cnfn log2(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log2(half); + half2 __ovld __cnfn log2(half2); +@@ -7794,14 +7800,14 @@ float3 __ovld __cnfn log10(float3); + float4 __ovld __cnfn log10(float4); + float8 __ovld __cnfn log10(float8); + float16 __ovld __cnfn log10(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log10(double); + double2 __ovld __cnfn log10(double2); + double3 __ovld __cnfn log10(double3); + double4 __ovld __cnfn log10(double4); + double8 __ovld __cnfn log10(double8); + double16 __ovld __cnfn log10(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log10(half); + half2 __ovld __cnfn log10(half2); +@@ -7820,14 +7826,14 @@ float3 __ovld __cnfn log1p(float3 x); + float4 __ovld __cnfn log1p(float4 x); + float8 __ovld __cnfn log1p(float8 x); + float16 __ovld __cnfn log1p(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log1p(double x); + double2 __ovld __cnfn log1p(double2 x); + double3 __ovld __cnfn log1p(double3 x); + double4 __ovld __cnfn log1p(double4 x); + double8 __ovld __cnfn log1p(double8 x); + double16 __ovld __cnfn log1p(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log1p(half x); + half2 __ovld __cnfn log1p(half2 x); +@@ -7847,14 +7853,14 @@ float3 __ovld __cnfn logb(float3 x); + float4 __ovld __cnfn logb(float4 x); + float8 __ovld __cnfn logb(float8 x); + float16 __ovld __cnfn logb(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn logb(double x); + double2 __ovld __cnfn logb(double2 x); + double3 __ovld __cnfn logb(double3 x); + double4 __ovld __cnfn logb(double4 x); + double8 __ovld __cnfn logb(double8 x); + double16 __ovld __cnfn logb(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn logb(half x); + half2 __ovld __cnfn logb(half2 x); +@@ -7877,14 +7883,14 @@ float3 __ovld __cnfn mad(float3 a, float3 b, float3 c); + float4 __ovld __cnfn mad(float4 a, float4 b, float4 c); + float8 __ovld __cnfn mad(float8 a, float8 b, float8 c); + float16 __ovld __cnfn mad(float16 a, float16 b, float16 c); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn mad(double a, double b, double c); + double2 __ovld __cnfn mad(double2 a, double2 b, double2 c); + double3 __ovld __cnfn mad(double3 a, double3 b, double3 c); + double4 __ovld __cnfn mad(double4 a, double4 b, double4 c); + double8 __ovld __cnfn mad(double8 a, double8 b, double8 c); + double16 __ovld __cnfn mad(double16 a, double16 b, double16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn mad(half a, half b, half c); + half2 __ovld __cnfn mad(half2 a, half2 b, half2 c); +@@ -7904,14 +7910,14 @@ float3 __ovld __cnfn maxmag(float3 x, float3 y); + float4 __ovld __cnfn maxmag(float4 x, float4 y); + float8 __ovld __cnfn maxmag(float8 x, float8 y); + float16 __ovld __cnfn maxmag(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn maxmag(double x, double y); + double2 __ovld __cnfn maxmag(double2 x, double2 y); + double3 __ovld __cnfn maxmag(double3 x, double3 y); + double4 __ovld __cnfn maxmag(double4 x, double4 y); + double8 __ovld __cnfn maxmag(double8 x, double8 y); + double16 __ovld __cnfn maxmag(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn maxmag(half x, half y); + half2 __ovld __cnfn maxmag(half2 x, half2 y); +@@ -7931,14 +7937,14 @@ float3 __ovld __cnfn minmag(float3 x, float3 y); + float4 __ovld __cnfn minmag(float4 x, float4 y); + float8 __ovld __cnfn minmag(float8 x, float8 y); + float16 __ovld __cnfn minmag(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn minmag(double x, double y); + double2 __ovld __cnfn minmag(double2 x, double2 y); + double3 __ovld __cnfn minmag(double3 x, double3 y); + double4 __ovld __cnfn minmag(double4 x, double4 y); + double8 __ovld __cnfn minmag(double8 x, double8 y); + double16 __ovld __cnfn minmag(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn minmag(half x, half y); + half2 __ovld __cnfn minmag(half2 x, half2 y); +@@ -7955,21 +7961,21 @@ half16 __ovld __cnfn minmag(half16 x, half16 y); + * the argument. It stores the integral part in the object + * pointed to by iptr. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld modf(float x, float *iptr); + float2 __ovld modf(float2 x, float2 *iptr); + float3 __ovld modf(float3 x, float3 *iptr); + float4 __ovld modf(float4 x, float4 *iptr); + float8 __ovld modf(float8 x, float8 *iptr); + float16 __ovld modf(float16 x, float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld modf(double x, double *iptr); + double2 __ovld modf(double2 x, double2 *iptr); + double3 __ovld modf(double3 x, double3 *iptr); + double4 __ovld modf(double4 x, double4 *iptr); + double8 __ovld modf(double8 x, double8 *iptr); + double16 __ovld modf(double16 x, double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld modf(half x, half *iptr); + half2 __ovld modf(half2 x, half2 *iptr); +@@ -7978,7 +7984,9 @@ half4 __ovld modf(half4 x, half4 *iptr); + half8 __ovld modf(half8 x, half8 *iptr); + half16 __ovld modf(half16 x, half16 *iptr); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld modf(float x, __global float *iptr); + float2 __ovld modf(float2 x, __global float2 *iptr); + float3 __ovld modf(float3 x, __global float3 *iptr); +@@ -7997,7 +8005,7 @@ float3 __ovld modf(float3 x, __private float3 *iptr); + float4 __ovld modf(float4 x, __private float4 *iptr); + float8 __ovld modf(float8 x, __private float8 *iptr); + float16 __ovld modf(float16 x, __private float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld modf(double x, __global double *iptr); + double2 __ovld modf(double2 x, __global double2 *iptr); + double3 __ovld modf(double3 x, __global double3 *iptr); +@@ -8016,7 +8024,7 @@ double3 __ovld modf(double3 x, __private double3 *iptr); + double4 __ovld modf(double4 x, __private double4 *iptr); + double8 __ovld modf(double8 x, __private double8 *iptr); + double16 __ovld modf(double16 x, __private double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld modf(half x, __global half *iptr); + half2 __ovld modf(half2 x, __global half2 *iptr); +@@ -8037,7 +8045,8 @@ half4 __ovld modf(half4 x, __private half4 *iptr); + half8 __ovld modf(half8 x, __private half8 *iptr); + half16 __ovld modf(half16 x, __private half16 *iptr); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + + /** + * Returns a quiet NaN. The nancode may be placed +@@ -8049,14 +8058,14 @@ float3 __ovld __cnfn nan(uint3 nancode); + float4 __ovld __cnfn nan(uint4 nancode); + float8 __ovld __cnfn nan(uint8 nancode); + float16 __ovld __cnfn nan(uint16 nancode); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn nan(ulong nancode); + double2 __ovld __cnfn nan(ulong2 nancode); + double3 __ovld __cnfn nan(ulong3 nancode); + double4 __ovld __cnfn nan(ulong4 nancode); + double8 __ovld __cnfn nan(ulong8 nancode); + double16 __ovld __cnfn nan(ulong16 nancode); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn nan(ushort nancode); + half2 __ovld __cnfn nan(ushort2 nancode); +@@ -8079,14 +8088,14 @@ float3 __ovld __cnfn nextafter(float3 x, float3 y); + float4 __ovld __cnfn nextafter(float4 x, float4 y); + float8 __ovld __cnfn nextafter(float8 x, float8 y); + float16 __ovld __cnfn nextafter(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn nextafter(double x, double y); + double2 __ovld __cnfn nextafter(double2 x, double2 y); + double3 __ovld __cnfn nextafter(double3 x, double3 y); + double4 __ovld __cnfn nextafter(double4 x, double4 y); + double8 __ovld __cnfn nextafter(double8 x, double8 y); + double16 __ovld __cnfn nextafter(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn nextafter(half x, half y); + half2 __ovld __cnfn nextafter(half2 x, half2 y); +@@ -8105,14 +8114,14 @@ float3 __ovld __cnfn pow(float3 x, float3 y); + float4 __ovld __cnfn pow(float4 x, float4 y); + float8 __ovld __cnfn pow(float8 x, float8 y); + float16 __ovld __cnfn pow(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn pow(double x, double y); + double2 __ovld __cnfn pow(double2 x, double2 y); + double3 __ovld __cnfn pow(double3 x, double3 y); + double4 __ovld __cnfn pow(double4 x, double4 y); + double8 __ovld __cnfn pow(double8 x, double8 y); + double16 __ovld __cnfn pow(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn pow(half x, half y); + half2 __ovld __cnfn pow(half2 x, half2 y); +@@ -8131,14 +8140,14 @@ float3 __ovld __cnfn pown(float3 x, int3 y); + float4 __ovld __cnfn pown(float4 x, int4 y); + float8 __ovld __cnfn pown(float8 x, int8 y); + float16 __ovld __cnfn pown(float16 x, int16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn pown(double x, int y); + double2 __ovld __cnfn pown(double2 x, int2 y); + double3 __ovld __cnfn pown(double3 x, int3 y); + double4 __ovld __cnfn pown(double4 x, int4 y); + double8 __ovld __cnfn pown(double8 x, int8 y); + double16 __ovld __cnfn pown(double16 x, int16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn pown(half x, int y); + half2 __ovld __cnfn pown(half2 x, int2 y); +@@ -8157,14 +8166,14 @@ float3 __ovld __cnfn powr(float3 x, float3 y); + float4 __ovld __cnfn powr(float4 x, float4 y); + float8 __ovld __cnfn powr(float8 x, float8 y); + float16 __ovld __cnfn powr(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn powr(double x, double y); + double2 __ovld __cnfn powr(double2 x, double2 y); + double3 __ovld __cnfn powr(double3 x, double3 y); + double4 __ovld __cnfn powr(double4 x, double4 y); + double8 __ovld __cnfn powr(double8 x, double8 y); + double16 __ovld __cnfn powr(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn powr(half x, half y); + half2 __ovld __cnfn powr(half2 x, half2 y); +@@ -8186,14 +8195,14 @@ float3 __ovld __cnfn remainder(float3 x, float3 y); + float4 __ovld __cnfn remainder(float4 x, float4 y); + float8 __ovld __cnfn remainder(float8 x, float8 y); + float16 __ovld __cnfn remainder(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn remainder(double x, double y); + double2 __ovld __cnfn remainder(double2 x, double2 y); + double3 __ovld __cnfn remainder(double3 x, double3 y); + double4 __ovld __cnfn remainder(double4 x, double4 y); + double8 __ovld __cnfn remainder(double8 x, double8 y); + double16 __ovld __cnfn remainder(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn remainder(half x, half y); + half2 __ovld __cnfn remainder(half2 x, half2 y); +@@ -8215,21 +8224,21 @@ half16 __ovld __cnfn remainder(half16 x, half16 y); + * sign as x/y. It stores this signed value in the object + * pointed to by quo. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld remquo(float x, float y, int *quo); + float2 __ovld remquo(float2 x, float2 y, int2 *quo); + float3 __ovld remquo(float3 x, float3 y, int3 *quo); + float4 __ovld remquo(float4 x, float4 y, int4 *quo); + float8 __ovld remquo(float8 x, float8 y, int8 *quo); + float16 __ovld remquo(float16 x, float16 y, int16 *quo); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld remquo(double x, double y, int *quo); + double2 __ovld remquo(double2 x, double2 y, int2 *quo); + double3 __ovld remquo(double3 x, double3 y, int3 *quo); + double4 __ovld remquo(double4 x, double4 y, int4 *quo); + double8 __ovld remquo(double8 x, double8 y, int8 *quo); + double16 __ovld remquo(double16 x, double16 y, int16 *quo); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld remquo(half x, half y, int *quo); + half2 __ovld remquo(half2 x, half2 y, int2 *quo); +@@ -8237,9 +8246,10 @@ half3 __ovld remquo(half3 x, half3 y, int3 *quo); + half4 __ovld remquo(half4 x, half4 y, int4 *quo); + half8 __ovld remquo(half8 x, half8 y, int8 *quo); + half16 __ovld remquo(half16 x, half16 y, int16 *quo); +- + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld remquo(float x, float y, __global int *quo); + float2 __ovld remquo(float2 x, float2 y, __global int2 *quo); + float3 __ovld remquo(float3 x, float3 y, __global int3 *quo); +@@ -8258,7 +8268,7 @@ float3 __ovld remquo(float3 x, float3 y, __private int3 *quo); + float4 __ovld remquo(float4 x, float4 y, __private int4 *quo); + float8 __ovld remquo(float8 x, float8 y, __private int8 *quo); + float16 __ovld remquo(float16 x, float16 y, __private int16 *quo); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld remquo(double x, double y, __global int *quo); + double2 __ovld remquo(double2 x, double2 y, __global int2 *quo); + double3 __ovld remquo(double3 x, double3 y, __global int3 *quo); +@@ -8277,7 +8287,7 @@ double3 __ovld remquo(double3 x, double3 y, __private int3 *quo); + double4 __ovld remquo(double4 x, double4 y, __private int4 *quo); + double8 __ovld remquo(double8 x, double8 y, __private int8 *quo); + double16 __ovld remquo(double16 x, double16 y, __private int16 *quo); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld remquo(half x, half y, __global int *quo); + half2 __ovld remquo(half2 x, half2 y, __global int2 *quo); +@@ -8298,7 +8308,8 @@ half4 __ovld remquo(half4 x, half4 y, __private int4 *quo); + half8 __ovld remquo(half8 x, half8 y, __private int8 *quo); + half16 __ovld remquo(half16 x, half16 y, __private int16 *quo); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Round to integral value (using round to nearest + * even rounding mode) in floating-point format. +@@ -8311,14 +8322,14 @@ float3 __ovld __cnfn rint(float3); + float4 __ovld __cnfn rint(float4); + float8 __ovld __cnfn rint(float8); + float16 __ovld __cnfn rint(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn rint(double); + double2 __ovld __cnfn rint(double2); + double3 __ovld __cnfn rint(double3); + double4 __ovld __cnfn rint(double4); + double8 __ovld __cnfn rint(double8); + double16 __ovld __cnfn rint(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn rint(half); + half2 __ovld __cnfn rint(half2); +@@ -8337,14 +8348,14 @@ float3 __ovld __cnfn rootn(float3 x, int3 y); + float4 __ovld __cnfn rootn(float4 x, int4 y); + float8 __ovld __cnfn rootn(float8 x, int8 y); + float16 __ovld __cnfn rootn(float16 x, int16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn rootn(double x, int y); + double2 __ovld __cnfn rootn(double2 x, int2 y); + double3 __ovld __cnfn rootn(double3 x, int3 y); + double4 __ovld __cnfn rootn(double4 x, int4 y); + double8 __ovld __cnfn rootn(double8 x, int8 y); + double16 __ovld __cnfn rootn(double16 x, int16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn rootn(half x, int y); + half2 __ovld __cnfn rootn(half2 x, int2 y); +@@ -8365,14 +8376,14 @@ float3 __ovld __cnfn round(float3 x); + float4 __ovld __cnfn round(float4 x); + float8 __ovld __cnfn round(float8 x); + float16 __ovld __cnfn round(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn round(double x); + double2 __ovld __cnfn round(double2 x); + double3 __ovld __cnfn round(double3 x); + double4 __ovld __cnfn round(double4 x); + double8 __ovld __cnfn round(double8 x); + double16 __ovld __cnfn round(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn round(half x); + half2 __ovld __cnfn round(half2 x); +@@ -8391,14 +8402,14 @@ float3 __ovld __cnfn rsqrt(float3); + float4 __ovld __cnfn rsqrt(float4); + float8 __ovld __cnfn rsqrt(float8); + float16 __ovld __cnfn rsqrt(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn rsqrt(double); + double2 __ovld __cnfn rsqrt(double2); + double3 __ovld __cnfn rsqrt(double3); + double4 __ovld __cnfn rsqrt(double4); + double8 __ovld __cnfn rsqrt(double8); + double16 __ovld __cnfn rsqrt(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn rsqrt(half); + half2 __ovld __cnfn rsqrt(half2); +@@ -8417,14 +8428,14 @@ float3 __ovld __cnfn sin(float3); + float4 __ovld __cnfn sin(float4); + float8 __ovld __cnfn sin(float8); + float16 __ovld __cnfn sin(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sin(double); + double2 __ovld __cnfn sin(double2); + double3 __ovld __cnfn sin(double3); + double4 __ovld __cnfn sin(double4); + double8 __ovld __cnfn sin(double8); + double16 __ovld __cnfn sin(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sin(half); + half2 __ovld __cnfn sin(half2); +@@ -8439,21 +8450,21 @@ half16 __ovld __cnfn sin(half16); + * is the return value and computed cosine is returned + * in cosval. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld sincos(float x, float *cosval); + float2 __ovld sincos(float2 x, float2 *cosval); + float3 __ovld sincos(float3 x, float3 *cosval); + float4 __ovld sincos(float4 x, float4 *cosval); + float8 __ovld sincos(float8 x, float8 *cosval); + float16 __ovld sincos(float16 x, float16 *cosval); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld sincos(double x, double *cosval); + double2 __ovld sincos(double2 x, double2 *cosval); + double3 __ovld sincos(double3 x, double3 *cosval); + double4 __ovld sincos(double4 x, double4 *cosval); + double8 __ovld sincos(double8 x, double8 *cosval); + double16 __ovld sincos(double16 x, double16 *cosval); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld sincos(half x, half *cosval); + half2 __ovld sincos(half2 x, half2 *cosval); +@@ -8462,7 +8473,9 @@ half4 __ovld sincos(half4 x, half4 *cosval); + half8 __ovld sincos(half8 x, half8 *cosval); + half16 __ovld sincos(half16 x, half16 *cosval); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld sincos(float x, __global float *cosval); + float2 __ovld sincos(float2 x, __global float2 *cosval); + float3 __ovld sincos(float3 x, __global float3 *cosval); +@@ -8481,7 +8494,7 @@ float3 __ovld sincos(float3 x, __private float3 *cosval); + float4 __ovld sincos(float4 x, __private float4 *cosval); + float8 __ovld sincos(float8 x, __private float8 *cosval); + float16 __ovld sincos(float16 x, __private float16 *cosval); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld sincos(double x, __global double *cosval); + double2 __ovld sincos(double2 x, __global double2 *cosval); + double3 __ovld sincos(double3 x, __global double3 *cosval); +@@ -8500,7 +8513,7 @@ double3 __ovld sincos(double3 x, __private double3 *cosval); + double4 __ovld sincos(double4 x, __private double4 *cosval); + double8 __ovld sincos(double8 x, __private double8 *cosval); + double16 __ovld sincos(double16 x, __private double16 *cosval); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld sincos(half x, __global half *cosval); + half2 __ovld sincos(half2 x, __global half2 *cosval); +@@ -8521,8 +8534,8 @@ half4 __ovld sincos(half4 x, __private half4 *cosval); + half8 __ovld sincos(half8 x, __private half8 *cosval); + half16 __ovld sincos(half16 x, __private half16 *cosval); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +- ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Compute hyperbolic sine. + */ +@@ -8532,14 +8545,14 @@ float3 __ovld __cnfn sinh(float3); + float4 __ovld __cnfn sinh(float4); + float8 __ovld __cnfn sinh(float8); + float16 __ovld __cnfn sinh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sinh(double); + double2 __ovld __cnfn sinh(double2); + double3 __ovld __cnfn sinh(double3); + double4 __ovld __cnfn sinh(double4); + double8 __ovld __cnfn sinh(double8); + double16 __ovld __cnfn sinh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sinh(half); + half2 __ovld __cnfn sinh(half2); +@@ -8558,14 +8571,14 @@ float3 __ovld __cnfn sinpi(float3 x); + float4 __ovld __cnfn sinpi(float4 x); + float8 __ovld __cnfn sinpi(float8 x); + float16 __ovld __cnfn sinpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sinpi(double x); + double2 __ovld __cnfn sinpi(double2 x); + double3 __ovld __cnfn sinpi(double3 x); + double4 __ovld __cnfn sinpi(double4 x); + double8 __ovld __cnfn sinpi(double8 x); + double16 __ovld __cnfn sinpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sinpi(half x); + half2 __ovld __cnfn sinpi(half2 x); +@@ -8584,14 +8597,14 @@ float3 __ovld __cnfn sqrt(float3); + float4 __ovld __cnfn sqrt(float4); + float8 __ovld __cnfn sqrt(float8); + float16 __ovld __cnfn sqrt(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sqrt(double); + double2 __ovld __cnfn sqrt(double2); + double3 __ovld __cnfn sqrt(double3); + double4 __ovld __cnfn sqrt(double4); + double8 __ovld __cnfn sqrt(double8); + double16 __ovld __cnfn sqrt(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sqrt(half); + half2 __ovld __cnfn sqrt(half2); +@@ -8610,14 +8623,14 @@ float3 __ovld __cnfn tan(float3); + float4 __ovld __cnfn tan(float4); + float8 __ovld __cnfn tan(float8); + float16 __ovld __cnfn tan(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tan(double); + double2 __ovld __cnfn tan(double2); + double3 __ovld __cnfn tan(double3); + double4 __ovld __cnfn tan(double4); + double8 __ovld __cnfn tan(double8); + double16 __ovld __cnfn tan(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tan(half); + half2 __ovld __cnfn tan(half2); +@@ -8636,14 +8649,14 @@ float3 __ovld __cnfn tanh(float3); + float4 __ovld __cnfn tanh(float4); + float8 __ovld __cnfn tanh(float8); + float16 __ovld __cnfn tanh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tanh(double); + double2 __ovld __cnfn tanh(double2); + double3 __ovld __cnfn tanh(double3); + double4 __ovld __cnfn tanh(double4); + double8 __ovld __cnfn tanh(double8); + double16 __ovld __cnfn tanh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tanh(half); + half2 __ovld __cnfn tanh(half2); +@@ -8662,14 +8675,14 @@ float3 __ovld __cnfn tanpi(float3 x); + float4 __ovld __cnfn tanpi(float4 x); + float8 __ovld __cnfn tanpi(float8 x); + float16 __ovld __cnfn tanpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tanpi(double x); + double2 __ovld __cnfn tanpi(double2 x); + double3 __ovld __cnfn tanpi(double3 x); + double4 __ovld __cnfn tanpi(double4 x); + double8 __ovld __cnfn tanpi(double8 x); + double16 __ovld __cnfn tanpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tanpi(half x); + half2 __ovld __cnfn tanpi(half2 x); +@@ -8688,14 +8701,14 @@ float3 __ovld __cnfn tgamma(float3); + float4 __ovld __cnfn tgamma(float4); + float8 __ovld __cnfn tgamma(float8); + float16 __ovld __cnfn tgamma(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tgamma(double); + double2 __ovld __cnfn tgamma(double2); + double3 __ovld __cnfn tgamma(double3); + double4 __ovld __cnfn tgamma(double4); + double8 __ovld __cnfn tgamma(double8); + double16 __ovld __cnfn tgamma(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tgamma(half); + half2 __ovld __cnfn tgamma(half2); +@@ -8715,14 +8728,14 @@ float3 __ovld __cnfn trunc(float3); + float4 __ovld __cnfn trunc(float4); + float8 __ovld __cnfn trunc(float8); + float16 __ovld __cnfn trunc(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn trunc(double); + double2 __ovld __cnfn trunc(double2); + double3 __ovld __cnfn trunc(double3); + double4 __ovld __cnfn trunc(double4); + double8 __ovld __cnfn trunc(double8); + double16 __ovld __cnfn trunc(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn trunc(half); + half2 __ovld __cnfn trunc(half2); +@@ -10108,7 +10121,7 @@ float3 __ovld __cnfn clamp(float3 x, float minval, float maxval); + float4 __ovld __cnfn clamp(float4 x, float minval, float maxval); + float8 __ovld __cnfn clamp(float8 x, float minval, float maxval); + float16 __ovld __cnfn clamp(float16 x, float minval, float maxval); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn clamp(double x, double minval, double maxval); + double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval); + double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval); +@@ -10120,7 +10133,7 @@ double3 __ovld __cnfn clamp(double3 x, double minval, double maxval); + double4 __ovld __cnfn clamp(double4 x, double minval, double maxval); + double8 __ovld __cnfn clamp(double8 x, double minval, double maxval); + double16 __ovld __cnfn clamp(double16 x, double minval, double maxval); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn clamp(half x, half minval, half maxval); + half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval); +@@ -10145,14 +10158,14 @@ float3 __ovld __cnfn degrees(float3 radians); + float4 __ovld __cnfn degrees(float4 radians); + float8 __ovld __cnfn degrees(float8 radians); + float16 __ovld __cnfn degrees(float16 radians); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn degrees(double radians); + double2 __ovld __cnfn degrees(double2 radians); + double3 __ovld __cnfn degrees(double3 radians); + double4 __ovld __cnfn degrees(double4 radians); + double8 __ovld __cnfn degrees(double8 radians); + double16 __ovld __cnfn degrees(double16 radians); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn degrees(half radians); + half2 __ovld __cnfn degrees(half2 radians); +@@ -10177,7 +10190,7 @@ float3 __ovld __cnfn max(float3 x, float y); + float4 __ovld __cnfn max(float4 x, float y); + float8 __ovld __cnfn max(float8 x, float y); + float16 __ovld __cnfn max(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn max(double x, double y); + double2 __ovld __cnfn max(double2 x, double2 y); + double3 __ovld __cnfn max(double3 x, double3 y); +@@ -10189,7 +10202,7 @@ double3 __ovld __cnfn max(double3 x, double y); + double4 __ovld __cnfn max(double4 x, double y); + double8 __ovld __cnfn max(double8 x, double y); + double16 __ovld __cnfn max(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn max(half x, half y); + half2 __ovld __cnfn max(half2 x, half2 y); +@@ -10219,7 +10232,7 @@ float3 __ovld __cnfn min(float3 x, float y); + float4 __ovld __cnfn min(float4 x, float y); + float8 __ovld __cnfn min(float8 x, float y); + float16 __ovld __cnfn min(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn min(double x, double y); + double2 __ovld __cnfn min(double2 x, double2 y); + double3 __ovld __cnfn min(double3 x, double3 y); +@@ -10231,7 +10244,7 @@ double3 __ovld __cnfn min(double3 x, double y); + double4 __ovld __cnfn min(double4 x, double y); + double8 __ovld __cnfn min(double8 x, double y); + double16 __ovld __cnfn min(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn min(half x, half y); + half2 __ovld __cnfn min(half2 x, half2 y); +@@ -10264,7 +10277,7 @@ float3 __ovld __cnfn mix(float3 x, float3 y, float a); + float4 __ovld __cnfn mix(float4 x, float4 y, float a); + float8 __ovld __cnfn mix(float8 x, float8 y, float a); + float16 __ovld __cnfn mix(float16 x, float16 y, float a); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn mix(double x, double y, double a); + double2 __ovld __cnfn mix(double2 x, double2 y, double2 a); + double3 __ovld __cnfn mix(double3 x, double3 y, double3 a); +@@ -10276,7 +10289,7 @@ double3 __ovld __cnfn mix(double3 x, double3 y, double a); + double4 __ovld __cnfn mix(double4 x, double4 y, double a); + double8 __ovld __cnfn mix(double8 x, double8 y, double a); + double16 __ovld __cnfn mix(double16 x, double16 y, double a); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn mix(half x, half y, half a); + half2 __ovld __cnfn mix(half2 x, half2 y, half2 a); +@@ -10301,14 +10314,14 @@ float3 __ovld __cnfn radians(float3 degrees); + float4 __ovld __cnfn radians(float4 degrees); + float8 __ovld __cnfn radians(float8 degrees); + float16 __ovld __cnfn radians(float16 degrees); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn radians(double degrees); + double2 __ovld __cnfn radians(double2 degrees); + double3 __ovld __cnfn radians(double3 degrees); + double4 __ovld __cnfn radians(double4 degrees); + double8 __ovld __cnfn radians(double8 degrees); + double16 __ovld __cnfn radians(double16 degrees); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn radians(half degrees); + half2 __ovld __cnfn radians(half2 degrees); +@@ -10332,7 +10345,7 @@ float3 __ovld __cnfn step(float edge, float3 x); + float4 __ovld __cnfn step(float edge, float4 x); + float8 __ovld __cnfn step(float edge, float8 x); + float16 __ovld __cnfn step(float edge, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn step(double edge, double x); + double2 __ovld __cnfn step(double2 edge, double2 x); + double3 __ovld __cnfn step(double3 edge, double3 x); +@@ -10344,7 +10357,7 @@ double3 __ovld __cnfn step(double edge, double3 x); + double4 __ovld __cnfn step(double edge, double4 x); + double8 __ovld __cnfn step(double edge, double8 x); + double16 __ovld __cnfn step(double edge, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn step(half edge, half x); + half2 __ovld __cnfn step(half2 edge, half2 x); +@@ -10383,7 +10396,7 @@ float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x); + float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x); + float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x); + float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn smoothstep(double edge0, double edge1, double x); + double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x); + double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x); +@@ -10395,7 +10408,7 @@ double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x); + double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x); + double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x); + double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn smoothstep(half edge0, half edge1, half x); + half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x); +@@ -10420,14 +10433,14 @@ float3 __ovld __cnfn sign(float3 x); + float4 __ovld __cnfn sign(float4 x); + float8 __ovld __cnfn sign(float8 x); + float16 __ovld __cnfn sign(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sign(double x); + double2 __ovld __cnfn sign(double2 x); + double3 __ovld __cnfn sign(double3 x); + double4 __ovld __cnfn sign(double4 x); + double8 __ovld __cnfn sign(double8 x); + double16 __ovld __cnfn sign(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sign(half x); + half2 __ovld __cnfn sign(half2 x); +@@ -10445,10 +10458,10 @@ half16 __ovld __cnfn sign(half16 x); + */ + float4 __ovld __cnfn cross(float4 p0, float4 p1); + float3 __ovld __cnfn cross(float3 p0, float3 p1); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double4 __ovld __cnfn cross(double4 p0, double4 p1); + double3 __ovld __cnfn cross(double3 p0, double3 p1); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half4 __ovld __cnfn cross(half4 p0, half4 p1); + half3 __ovld __cnfn cross(half3 p0, half3 p1); +@@ -10461,12 +10474,12 @@ float __ovld __cnfn dot(float p0, float p1); + float __ovld __cnfn dot(float2 p0, float2 p1); + float __ovld __cnfn dot(float3 p0, float3 p1); + float __ovld __cnfn dot(float4 p0, float4 p1); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn dot(double p0, double p1); + double __ovld __cnfn dot(double2 p0, double2 p1); + double __ovld __cnfn dot(double3 p0, double3 p1); + double __ovld __cnfn dot(double4 p0, double4 p1); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn dot(half p0, half p1); + half __ovld __cnfn dot(half2 p0, half2 p1); +@@ -10482,12 +10495,12 @@ float __ovld __cnfn distance(float p0, float p1); + float __ovld __cnfn distance(float2 p0, float2 p1); + float __ovld __cnfn distance(float3 p0, float3 p1); + float __ovld __cnfn distance(float4 p0, float4 p1); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn distance(double p0, double p1); + double __ovld __cnfn distance(double2 p0, double2 p1); + double __ovld __cnfn distance(double3 p0, double3 p1); + double __ovld __cnfn distance(double4 p0, double4 p1); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn distance(half p0, half p1); + half __ovld __cnfn distance(half2 p0, half2 p1); +@@ -10503,12 +10516,12 @@ float __ovld __cnfn length(float p); + float __ovld __cnfn length(float2 p); + float __ovld __cnfn length(float3 p); + float __ovld __cnfn length(float4 p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn length(double p); + double __ovld __cnfn length(double2 p); + double __ovld __cnfn length(double3 p); + double __ovld __cnfn length(double4 p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn length(half p); + half __ovld __cnfn length(half2 p); +@@ -10524,12 +10537,12 @@ float __ovld __cnfn normalize(float p); + float2 __ovld __cnfn normalize(float2 p); + float3 __ovld __cnfn normalize(float3 p); + float4 __ovld __cnfn normalize(float4 p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn normalize(double p); + double2 __ovld __cnfn normalize(double2 p); + double3 __ovld __cnfn normalize(double3 p); + double4 __ovld __cnfn normalize(double4 p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn normalize(half p); + half2 __ovld __cnfn normalize(half2 p); +@@ -10610,14 +10623,14 @@ int3 __ovld __cnfn isequal(float3 x, float3 y); + int4 __ovld __cnfn isequal(float4 x, float4 y); + int8 __ovld __cnfn isequal(float8 x, float8 y); + int16 __ovld __cnfn isequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isequal(double x, double y); + long2 __ovld __cnfn isequal(double2 x, double2 y); + long3 __ovld __cnfn isequal(double3 x, double3 y); + long4 __ovld __cnfn isequal(double4 x, double4 y); + long8 __ovld __cnfn isequal(double8 x, double8 y); + long16 __ovld __cnfn isequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isequal(half x, half y); + short2 __ovld __cnfn isequal(half2 x, half2 y); +@@ -10636,14 +10649,14 @@ int3 __ovld __cnfn isnotequal(float3 x, float3 y); + int4 __ovld __cnfn isnotequal(float4 x, float4 y); + int8 __ovld __cnfn isnotequal(float8 x, float8 y); + int16 __ovld __cnfn isnotequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isnotequal(double x, double y); + long2 __ovld __cnfn isnotequal(double2 x, double2 y); + long3 __ovld __cnfn isnotequal(double3 x, double3 y); + long4 __ovld __cnfn isnotequal(double4 x, double4 y); + long8 __ovld __cnfn isnotequal(double8 x, double8 y); + long16 __ovld __cnfn isnotequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isnotequal(half x, half y); + short2 __ovld __cnfn isnotequal(half2 x, half2 y); +@@ -10662,14 +10675,14 @@ int3 __ovld __cnfn isgreater(float3 x, float3 y); + int4 __ovld __cnfn isgreater(float4 x, float4 y); + int8 __ovld __cnfn isgreater(float8 x, float8 y); + int16 __ovld __cnfn isgreater(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isgreater(double x, double y); + long2 __ovld __cnfn isgreater(double2 x, double2 y); + long3 __ovld __cnfn isgreater(double3 x, double3 y); + long4 __ovld __cnfn isgreater(double4 x, double4 y); + long8 __ovld __cnfn isgreater(double8 x, double8 y); + long16 __ovld __cnfn isgreater(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isgreater(half x, half y); + short2 __ovld __cnfn isgreater(half2 x, half2 y); +@@ -10688,14 +10701,14 @@ int3 __ovld __cnfn isgreaterequal(float3 x, float3 y); + int4 __ovld __cnfn isgreaterequal(float4 x, float4 y); + int8 __ovld __cnfn isgreaterequal(float8 x, float8 y); + int16 __ovld __cnfn isgreaterequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isgreaterequal(double x, double y); + long2 __ovld __cnfn isgreaterequal(double2 x, double2 y); + long3 __ovld __cnfn isgreaterequal(double3 x, double3 y); + long4 __ovld __cnfn isgreaterequal(double4 x, double4 y); + long8 __ovld __cnfn isgreaterequal(double8 x, double8 y); + long16 __ovld __cnfn isgreaterequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isgreaterequal(half x, half y); + short2 __ovld __cnfn isgreaterequal(half2 x, half2 y); +@@ -10714,14 +10727,14 @@ int3 __ovld __cnfn isless(float3 x, float3 y); + int4 __ovld __cnfn isless(float4 x, float4 y); + int8 __ovld __cnfn isless(float8 x, float8 y); + int16 __ovld __cnfn isless(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isless(double x, double y); + long2 __ovld __cnfn isless(double2 x, double2 y); + long3 __ovld __cnfn isless(double3 x, double3 y); + long4 __ovld __cnfn isless(double4 x, double4 y); + long8 __ovld __cnfn isless(double8 x, double8 y); + long16 __ovld __cnfn isless(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isless(half x, half y); + short2 __ovld __cnfn isless(half2 x, half2 y); +@@ -10740,14 +10753,14 @@ int3 __ovld __cnfn islessequal(float3 x, float3 y); + int4 __ovld __cnfn islessequal(float4 x, float4 y); + int8 __ovld __cnfn islessequal(float8 x, float8 y); + int16 __ovld __cnfn islessequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn islessequal(double x, double y); + long2 __ovld __cnfn islessequal(double2 x, double2 y); + long3 __ovld __cnfn islessequal(double3 x, double3 y); + long4 __ovld __cnfn islessequal(double4 x, double4 y); + long8 __ovld __cnfn islessequal(double8 x, double8 y); + long16 __ovld __cnfn islessequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn islessequal(half x, half y); + short2 __ovld __cnfn islessequal(half2 x, half2 y); +@@ -10767,14 +10780,14 @@ int3 __ovld __cnfn islessgreater(float3 x, float3 y); + int4 __ovld __cnfn islessgreater(float4 x, float4 y); + int8 __ovld __cnfn islessgreater(float8 x, float8 y); + int16 __ovld __cnfn islessgreater(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn islessgreater(double x, double y); + long2 __ovld __cnfn islessgreater(double2 x, double2 y); + long3 __ovld __cnfn islessgreater(double3 x, double3 y); + long4 __ovld __cnfn islessgreater(double4 x, double4 y); + long8 __ovld __cnfn islessgreater(double8 x, double8 y); + long16 __ovld __cnfn islessgreater(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn islessgreater(half x, half y); + short2 __ovld __cnfn islessgreater(half2 x, half2 y); +@@ -10793,14 +10806,14 @@ int3 __ovld __cnfn isfinite(float3); + int4 __ovld __cnfn isfinite(float4); + int8 __ovld __cnfn isfinite(float8); + int16 __ovld __cnfn isfinite(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isfinite(double); + long2 __ovld __cnfn isfinite(double2); + long3 __ovld __cnfn isfinite(double3); + long4 __ovld __cnfn isfinite(double4); + long8 __ovld __cnfn isfinite(double8); + long16 __ovld __cnfn isfinite(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isfinite(half); + short2 __ovld __cnfn isfinite(half2); +@@ -10819,14 +10832,14 @@ int3 __ovld __cnfn isinf(float3); + int4 __ovld __cnfn isinf(float4); + int8 __ovld __cnfn isinf(float8); + int16 __ovld __cnfn isinf(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isinf(double); + long2 __ovld __cnfn isinf(double2); + long3 __ovld __cnfn isinf(double3); + long4 __ovld __cnfn isinf(double4); + long8 __ovld __cnfn isinf(double8); + long16 __ovld __cnfn isinf(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isinf(half); + short2 __ovld __cnfn isinf(half2); +@@ -10845,14 +10858,14 @@ int3 __ovld __cnfn isnan(float3); + int4 __ovld __cnfn isnan(float4); + int8 __ovld __cnfn isnan(float8); + int16 __ovld __cnfn isnan(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isnan(double); + long2 __ovld __cnfn isnan(double2); + long3 __ovld __cnfn isnan(double3); + long4 __ovld __cnfn isnan(double4); + long8 __ovld __cnfn isnan(double8); + long16 __ovld __cnfn isnan(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isnan(half); + short2 __ovld __cnfn isnan(half2); +@@ -10871,14 +10884,14 @@ int3 __ovld __cnfn isnormal(float3); + int4 __ovld __cnfn isnormal(float4); + int8 __ovld __cnfn isnormal(float8); + int16 __ovld __cnfn isnormal(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isnormal(double); + long2 __ovld __cnfn isnormal(double2); + long3 __ovld __cnfn isnormal(double3); + long4 __ovld __cnfn isnormal(double4); + long8 __ovld __cnfn isnormal(double8); + long16 __ovld __cnfn isnormal(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isnormal(half); + short2 __ovld __cnfn isnormal(half2); +@@ -10899,14 +10912,14 @@ int3 __ovld __cnfn isordered(float3 x, float3 y); + int4 __ovld __cnfn isordered(float4 x, float4 y); + int8 __ovld __cnfn isordered(float8 x, float8 y); + int16 __ovld __cnfn isordered(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isordered(double x, double y); + long2 __ovld __cnfn isordered(double2 x, double2 y); + long3 __ovld __cnfn isordered(double3 x, double3 y); + long4 __ovld __cnfn isordered(double4 x, double4 y); + long8 __ovld __cnfn isordered(double8 x, double8 y); + long16 __ovld __cnfn isordered(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isordered(half x, half y); + short2 __ovld __cnfn isordered(half2 x, half2 y); +@@ -10927,14 +10940,14 @@ int3 __ovld __cnfn isunordered(float3 x, float3 y); + int4 __ovld __cnfn isunordered(float4 x, float4 y); + int8 __ovld __cnfn isunordered(float8 x, float8 y); + int16 __ovld __cnfn isunordered(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isunordered(double x, double y); + long2 __ovld __cnfn isunordered(double2 x, double2 y); + long3 __ovld __cnfn isunordered(double3 x, double3 y); + long4 __ovld __cnfn isunordered(double4 x, double4 y); + long8 __ovld __cnfn isunordered(double8 x, double8 y); + long16 __ovld __cnfn isunordered(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isunordered(half x, half y); + short2 __ovld __cnfn isunordered(half2 x, half2 y); +@@ -10957,14 +10970,14 @@ int3 __ovld __cnfn signbit(float3); + int4 __ovld __cnfn signbit(float4); + int8 __ovld __cnfn signbit(float8); + int16 __ovld __cnfn signbit(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn signbit(double); + long2 __ovld __cnfn signbit(double2); + long3 __ovld __cnfn signbit(double3); + long4 __ovld __cnfn signbit(double4); + long8 __ovld __cnfn signbit(double8); + long16 __ovld __cnfn signbit(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn signbit(half); + short2 __ovld __cnfn signbit(half2); +@@ -11091,14 +11104,14 @@ float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c); + float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c); + float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c); + float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn bitselect(double a, double b, double c); + double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c); + double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c); + double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c); + double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c); + double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn bitselect(half a, half b, half c); + half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c); +@@ -11231,7 +11244,7 @@ ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c); + long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c); + ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn select(double a, double b, long c); + double2 __ovld __cnfn select(double2 a, double2 b, long2 c); + double3 __ovld __cnfn select(double3 a, double3 b, long3 c); +@@ -11244,7 +11257,7 @@ double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c); + double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c); + double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c); + double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn select(half a, half b, short c); + half2 __ovld __cnfn select(half2 a, half2 b, short2 c); +@@ -11323,13 +11336,13 @@ uint16 __ovld vload16(size_t offset, const __constant uint *p); + long16 __ovld vload16(size_t offset, const __constant long *p); + ulong16 __ovld vload16(size_t offset, const __constant ulong *p); + float16 __ovld vload16(size_t offset, const __constant float *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld vload2(size_t offset, const __constant double *p); + double3 __ovld vload3(size_t offset, const __constant double *p); + double4 __ovld vload4(size_t offset, const __constant double *p); + double8 __ovld vload8(size_t offset, const __constant double *p); + double16 __ovld vload16(size_t offset, const __constant double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld vload(size_t offset, const __constant half *p); +@@ -11340,7 +11353,7 @@ half8 __ovld vload8(size_t offset, const __constant half *p); + half16 __ovld vload16(size_t offset, const __constant half *p); + #endif //cl_khr_fp16 + +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + char2 __ovld vload2(size_t offset, const char *p); + uchar2 __ovld vload2(size_t offset, const uchar *p); + short2 __ovld vload2(size_t offset, const short *p); +@@ -11387,13 +11400,13 @@ long16 __ovld vload16(size_t offset, const long *p); + ulong16 __ovld vload16(size_t offset, const ulong *p); + float16 __ovld vload16(size_t offset, const float *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld vload2(size_t offset, const double *p); + double3 __ovld vload3(size_t offset, const double *p); + double4 __ovld vload4(size_t offset, const double *p); + double8 __ovld vload8(size_t offset, const double *p); + double16 __ovld vload16(size_t offset, const double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld vload(size_t offset, const half *p); +@@ -11403,7 +11416,7 @@ half4 __ovld vload4(size_t offset, const half *p); + half8 __ovld vload8(size_t offset, const half *p); + half16 __ovld vload16(size_t offset, const half *p); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space + char2 __ovld vload2(size_t offset, const __global char *p); + uchar2 __ovld vload2(size_t offset, const __global uchar *p); + short2 __ovld vload2(size_t offset, const __global short *p); +@@ -11540,7 +11553,7 @@ long16 __ovld vload16(size_t offset, const __private long *p); + ulong16 __ovld vload16(size_t offset, const __private ulong *p); + float16 __ovld vload16(size_t offset, const __private float *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld vload2(size_t offset, const __global double *p); + double3 __ovld vload3(size_t offset, const __global double *p); + double4 __ovld vload4(size_t offset, const __global double *p); +@@ -11556,7 +11569,7 @@ double3 __ovld vload3(size_t offset, const __private double *p); + double4 __ovld vload4(size_t offset, const __private double *p); + double8 __ovld vload8(size_t offset, const __private double *p); + double16 __ovld vload16(size_t offset, const __private double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld vload(size_t offset, const __global half *p); +@@ -11578,9 +11591,8 @@ half4 __ovld vload4(size_t offset, const __private half *p); + half8 __ovld vload8(size_t offset, const __private half *p); + half16 __ovld vload16(size_t offset, const __private half *p); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + void __ovld vstore2(char2 data, size_t offset, char *p); + void __ovld vstore2(uchar2 data, size_t offset, uchar *p); + void __ovld vstore2(short2 data, size_t offset, short *p); +@@ -11626,13 +11638,13 @@ void __ovld vstore16(uint16 data, size_t offset, uint *p); + void __ovld vstore16(long16 data, size_t offset, long *p); + void __ovld vstore16(ulong16 data, size_t offset, ulong *p); + void __ovld vstore16(float16 data, size_t offset, float *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore2(double2 data, size_t offset, double *p); + void __ovld vstore3(double3 data, size_t offset, double *p); + void __ovld vstore4(double4 data, size_t offset, double *p); + void __ovld vstore8(double8 data, size_t offset, double *p); + void __ovld vstore16(double16 data, size_t offset, double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + void __ovld vstore(half data, size_t offset, half *p); + void __ovld vstore2(half2 data, size_t offset, half *p); +@@ -11641,7 +11653,7 @@ void __ovld vstore4(half4 data, size_t offset, half *p); + void __ovld vstore8(half8 data, size_t offset, half *p); + void __ovld vstore16(half16 data, size_t offset, half *p); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space + void __ovld vstore2(char2 data, size_t offset, __global char *p); + void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p); + void __ovld vstore2(short2 data, size_t offset, __global short *p); +@@ -11777,7 +11789,7 @@ void __ovld vstore16(uint16 data, size_t offset, __private uint *p); + void __ovld vstore16(long16 data, size_t offset, __private long *p); + void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p); + void __ovld vstore16(float16 data, size_t offset, __private float *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore2(double2 data, size_t offset, __global double *p); + void __ovld vstore3(double3 data, size_t offset, __global double *p); + void __ovld vstore4(double4 data, size_t offset, __global double *p); +@@ -11793,7 +11805,7 @@ void __ovld vstore3(double3 data, size_t offset, __private double *p); + void __ovld vstore4(double4 data, size_t offset, __private double *p); + void __ovld vstore8(double8 data, size_t offset, __private double *p); + void __ovld vstore16(double16 data, size_t offset, __private double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + void __ovld vstore(half data, size_t offset, __global half *p); + void __ovld vstore2(half2 data, size_t offset, __global half *p); +@@ -11814,7 +11826,6 @@ void __ovld vstore4(half4 data, size_t offset, __private half *p); + void __ovld vstore8(half8 data, size_t offset, __private half *p); + void __ovld vstore16(half16 data, size_t offset, __private half *p); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Read sizeof (half) bytes of data from address +@@ -11825,13 +11836,12 @@ void __ovld vstore16(half16 data, size_t offset, __private half *p); + * must be 16-bit aligned. + */ + float __ovld vload_half(size_t offset, const __constant half *p); +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld vload_half(size_t offset, const half *p); +-#else ++#endif //__opencl_c_generic_address_space + float __ovld vload_half(size_t offset, const __global half *p); + float __ovld vload_half(size_t offset, const __local half *p); + float __ovld vload_half(size_t offset, const __private half *p); +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Read sizeof (halfn) bytes of data from address +@@ -11846,13 +11856,13 @@ float3 __ovld vload_half3(size_t offset, const __constant half *p); + float4 __ovld vload_half4(size_t offset, const __constant half *p); + float8 __ovld vload_half8(size_t offset, const __constant half *p); + float16 __ovld vload_half16(size_t offset, const __constant half *p); +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float2 __ovld vload_half2(size_t offset, const half *p); + float3 __ovld vload_half3(size_t offset, const half *p); + float4 __ovld vload_half4(size_t offset, const half *p); + float8 __ovld vload_half8(size_t offset, const half *p); + float16 __ovld vload_half16(size_t offset, const half *p); +-#else ++#endif //__opencl_c_generic_address_space + float2 __ovld vload_half2(size_t offset, const __global half *p); + float3 __ovld vload_half3(size_t offset, const __global half *p); + float4 __ovld vload_half4(size_t offset, const __global half *p); +@@ -11868,7 +11878,6 @@ float3 __ovld vload_half3(size_t offset, const __private half *p); + float4 __ovld vload_half4(size_t offset, const __private half *p); + float8 __ovld vload_half8(size_t offset, const __private half *p); + float16 __ovld vload_half16(size_t offset, const __private half *p); +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * The float value given by data is first +@@ -11881,20 +11890,20 @@ float16 __ovld vload_half16(size_t offset, const __private half *p); + * The default current rounding mode is round to + * nearest even. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + void __ovld vstore_half(float data, size_t offset, half *p); + void __ovld vstore_half_rte(float data, size_t offset, half *p); + void __ovld vstore_half_rtz(float data, size_t offset, half *p); + void __ovld vstore_half_rtp(float data, size_t offset, half *p); + void __ovld vstore_half_rtn(float data, size_t offset, half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half(double data, size_t offset, half *p); + void __ovld vstore_half_rte(double data, size_t offset, half *p); + void __ovld vstore_half_rtz(double data, size_t offset, half *p); + void __ovld vstore_half_rtp(double data, size_t offset, half *p); + void __ovld vstore_half_rtn(double data, size_t offset, half *p); +-#endif //cl_khr_fp64 +-#else ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_generic_address_space + void __ovld vstore_half(float data, size_t offset, __global half *p); + void __ovld vstore_half_rte(float data, size_t offset, __global half *p); + void __ovld vstore_half_rtz(float data, size_t offset, __global half *p); +@@ -11910,7 +11919,7 @@ void __ovld vstore_half_rte(float data, size_t offset, __private half *p); + void __ovld vstore_half_rtz(float data, size_t offset, __private half *p); + void __ovld vstore_half_rtp(float data, size_t offset, __private half *p); + void __ovld vstore_half_rtn(float data, size_t offset, __private half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half(double data, size_t offset, __global half *p); + void __ovld vstore_half_rte(double data, size_t offset, __global half *p); + void __ovld vstore_half_rtz(double data, size_t offset, __global half *p); +@@ -11926,8 +11935,7 @@ void __ovld vstore_half_rte(double data, size_t offset, __private half *p); + void __ovld vstore_half_rtz(double data, size_t offset, __private half *p); + void __ovld vstore_half_rtp(double data, size_t offset, __private half *p); + void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); +-#endif //cl_khr_fp64 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + /** + * The floatn value given by data is converted to +@@ -11940,7 +11948,7 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); + * The default current rounding mode is round to + * nearest even. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + void __ovld vstore_half2(float2 data, size_t offset, half *p); + void __ovld vstore_half3(float3 data, size_t offset, half *p); + void __ovld vstore_half4(float4 data, size_t offset, half *p); +@@ -11966,7 +11974,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p); + void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p); + void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p); + void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half2(double2 data, size_t offset, half *p); + void __ovld vstore_half3(double3 data, size_t offset, half *p); + void __ovld vstore_half4(double4 data, size_t offset, half *p); +@@ -11992,8 +12000,8 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p); + void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p); + void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p); + void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p); +-#endif //cl_khr_fp64 +-#else ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_generic_address_space + void __ovld vstore_half2(float2 data, size_t offset, __global half *p); + void __ovld vstore_half3(float3 data, size_t offset, __global half *p); + void __ovld vstore_half4(float4 data, size_t offset, __global half *p); +@@ -12069,7 +12077,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p); + void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p); + void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p); + void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half2(double2 data, size_t offset, __global half *p); + void __ovld vstore_half3(double3 data, size_t offset, __global half *p); + void __ovld vstore_half4(double4 data, size_t offset, __global half *p); +@@ -12145,8 +12153,7 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p); + void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p); + void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p); + void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p); +-#endif //cl_khr_fp64 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + /** + * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) +@@ -12167,14 +12174,14 @@ float3 __ovld vloada_half3(size_t offset, const __constant half *p); + float4 __ovld vloada_half4(size_t offset, const __constant half *p); + float8 __ovld vloada_half8(size_t offset, const __constant half *p); + float16 __ovld vloada_half16(size_t offset, const __constant half *p); +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld vloada_half(size_t offset, const half *p); + float2 __ovld vloada_half2(size_t offset, const half *p); + float3 __ovld vloada_half3(size_t offset, const half *p); + float4 __ovld vloada_half4(size_t offset, const half *p); + float8 __ovld vloada_half8(size_t offset, const half *p); + float16 __ovld vloada_half16(size_t offset, const half *p); +-#else ++#endif //__opencl_c_generic_address_space + float __ovld vloada_half(size_t offset, const __global half *p); + float2 __ovld vloada_half2(size_t offset, const __global half *p); + float3 __ovld vloada_half3(size_t offset, const __global half *p); +@@ -12193,7 +12200,6 @@ float3 __ovld vloada_half3(size_t offset, const __private half *p); + float4 __ovld vloada_half4(size_t offset, const __private half *p); + float8 __ovld vloada_half8(size_t offset, const __private half *p); + float16 __ovld vloada_half16(size_t offset, const __private half *p); +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * The floatn value given by data is converted to +@@ -12211,7 +12217,7 @@ float16 __ovld vloada_half16(size_t offset, const __private half *p); + * mode. The default current rounding mode is + * round to nearest even. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + void __ovld vstorea_half(float data, size_t offset, half *p); + void __ovld vstorea_half2(float2 data, size_t offset, half *p); + void __ovld vstorea_half3(float3 data, size_t offset, half *p); +@@ -12247,7 +12253,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p); + void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p); + void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstorea_half(double data, size_t offset, half *p); + void __ovld vstorea_half2(double2 data, size_t offset, half *p); + void __ovld vstorea_half3(double3 data, size_t offset, half *p); +@@ -12282,9 +12288,9 @@ void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p); + void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p); + void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p); + void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_generic_address_space + +-#else + void __ovld vstorea_half(float data, size_t offset, __global half *p); + void __ovld vstorea_half2(float2 data, size_t offset, __global half *p); + void __ovld vstorea_half3(float3 data, size_t offset, __global half *p); +@@ -12390,7 +12396,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p); + void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p); + void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstorea_half(double data, size_t offset, __global half *p); + void __ovld vstorea_half2(double2 data, size_t offset, __global half *p); + void __ovld vstorea_half3(double3 data, size_t offset, __global half *p); +@@ -12495,8 +12501,7 @@ void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p); + void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p); + void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p); + void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p); +-#endif //cl_khr_fp64 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions + +@@ -12580,7 +12585,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags); + + // OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions + +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + cl_mem_fence_flags __ovld get_fence(const void *ptr); + cl_mem_fence_flags __ovld get_fence(void *ptr); + +@@ -12591,7 +12596,7 @@ cl_mem_fence_flags __ovld get_fence(void *ptr); + * where gentype is builtin type or user defined type. + */ + +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //__opencl_c_generic_address_space + + // OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch + +@@ -12730,7 +12735,7 @@ event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 + event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event); +@@ -12743,7 +12748,7 @@ event_t __ovld async_work_group_copy(__global double3 *dst, const __local double + event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event); +@@ -12893,7 +12898,7 @@ event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local + event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event); +@@ -12906,7 +12911,7 @@ event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __loca + event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event); +@@ -12996,14 +13001,14 @@ void __ovld prefetch(const __global uint16 *p, size_t num_elements); + void __ovld prefetch(const __global long16 *p, size_t num_elements); + void __ovld prefetch(const __global ulong16 *p, size_t num_elements); + void __ovld prefetch(const __global float16 *p, size_t num_elements); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld prefetch(const __global double *p, size_t num_elements); + void __ovld prefetch(const __global double2 *p, size_t num_elements); + void __ovld prefetch(const __global double3 *p, size_t num_elements); + void __ovld prefetch(const __global double4 *p, size_t num_elements); + void __ovld prefetch(const __global double8 *p, size_t num_elements); + void __ovld prefetch(const __global double16 *p, size_t num_elements); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + void __ovld prefetch(const __global half *p, size_t num_elements); + void __ovld prefetch(const __global half2 *p, size_t num_elements); +@@ -13026,9 +13031,11 @@ void __ovld prefetch(const __global half16 *p, size_t num_elements); + * pointed by p. The function returns old. + */ + int __ovld atomic_add(volatile __global int *p, int val); +-unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_add(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_add(volatile __local int *p, int val); +-unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_add(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_add(volatile int *p, int val); + unsigned int __ovld atomic_add(volatile unsigned int *p, unsigned int val); +@@ -13056,9 +13063,11 @@ unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long v + * returns old. + */ + int __ovld atomic_sub(volatile __global int *p, int val); +-unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_sub(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_sub(volatile __local int *p, int val); +-unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_sub(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_sub(volatile int *p, int val); + unsigned int __ovld atomic_sub(volatile unsigned int *p, unsigned int val); +@@ -13086,9 +13095,11 @@ unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long v + * value. + */ + int __ovld atomic_xchg(volatile __global int *p, int val); +-unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_xchg(volatile __local int *p, int val); +-unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, ++ unsigned int val); + float __ovld atomic_xchg(volatile __global float *p, float val); + float __ovld atomic_xchg(volatile __local float *p, float val); + #ifdef __OPENCL_CPP_VERSION__ +@@ -13183,12 +13194,15 @@ unsigned long __ovld atom_dec(volatile __local unsigned long *p); + * returns old. + */ + int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val); +-unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val); ++unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, ++ unsigned int cmp, unsigned int val); + int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val); +-unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val); ++unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, ++ unsigned int cmp, unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_cmpxchg(volatile int *p, int cmp, int val); +-unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, unsigned int val); ++unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, ++ unsigned int val); + #endif + + #if defined(cl_khr_global_int32_base_atomics) +@@ -13215,9 +13229,11 @@ unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned lo + * returns old. + */ + int __ovld atomic_min(volatile __global int *p, int val); +-unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_min(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_min(volatile __local int *p, int val); +-unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_min(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_min(volatile int *p, int val); + unsigned int __ovld atomic_min(volatile unsigned int *p, unsigned int val); +@@ -13247,9 +13263,11 @@ unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long v + * returns old. + */ + int __ovld atomic_max(volatile __global int *p, int val); +-unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_max(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_max(volatile __local int *p, int val); +-unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_max(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_max(volatile int *p, int val); + unsigned int __ovld atomic_max(volatile unsigned int *p, unsigned int val); +@@ -13278,9 +13296,11 @@ unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long v + * pointed by p. The function returns old. + */ + int __ovld atomic_and(volatile __global int *p, int val); +-unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_and(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_and(volatile __local int *p, int val); +-unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_and(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_and(volatile int *p, int val); + unsigned int __ovld atomic_and(volatile unsigned int *p, unsigned int val); +@@ -13309,9 +13329,11 @@ unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long v + * pointed by p. The function returns old. + */ + int __ovld atomic_or(volatile __global int *p, int val); +-unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_or(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_or(volatile __local int *p, int val); +-unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_or(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_or(volatile int *p, int val); + unsigned int __ovld atomic_or(volatile unsigned int *p, unsigned int val); +@@ -13340,9 +13362,11 @@ unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long va + * pointed by p. The function returns old. + */ + int __ovld atomic_xor(volatile __global int *p, int val); +-unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xor(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_xor(volatile __local int *p, int val); +-unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xor(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_xor(volatile int *p, int val); + unsigned int __ovld atomic_xor(volatile unsigned int *p, unsigned int val); +@@ -13380,120 +13404,78 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v + #endif + + // atomic_init() ++#ifdef __opencl_c_generic_address_space + void __ovld atomic_init(volatile atomic_int *object, int value); + void __ovld atomic_init(volatile atomic_uint *object, uint value); + void __ovld atomic_init(volatile atomic_float *object, float value); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) + void __ovld atomic_init(volatile atomic_long *object, long value); + void __ovld atomic_init(volatile atomic_ulong *object, ulong value); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld atomic_init(volatile atomic_double *object, double value); +-#endif //cl_khr_fp64 +-#endif ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++void __ovld atomic_init(volatile atomic_int __global *object, int value); ++void __ovld atomic_init(volatile atomic_int __local *object, int value); ++void __ovld atomic_init(volatile atomic_uint __global *object, uint value); ++void __ovld atomic_init(volatile atomic_uint __local *object, uint value); ++void __ovld atomic_init(volatile atomic_float __global *object, float value); ++void __ovld atomic_init(volatile atomic_float __local *object, float value); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++void __ovld atomic_init(volatile atomic_long __global *object, long value); ++void __ovld atomic_init(volatile atomic_long __local *object, long value); ++void __ovld atomic_init(volatile atomic_ulong __global *object, ulong value); ++void __ovld atomic_init(volatile atomic_ulong __local *object, ulong value); ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++void __ovld atomic_init(volatile atomic_double __global *object, double value); ++void __ovld atomic_init(volatile atomic_double __local *object, double value); ++#endif // cl_khr_fp64 ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_work_item_fence() +-void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope); ++void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, ++ memory_scope scope); + + // atomic_fetch() +- ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++#ifdef __opencl_c_generic_address_space + int __ovld atomic_fetch_add(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_or(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_and(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_min(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); +-uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand); +-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order); +-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_max(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); +-uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand); +-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order); +-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope); + + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) + long __ovld atomic_fetch_add(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_or(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_and(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_min(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +-ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand); +-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order); +-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_max(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +-ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand); +-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order); +-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope); + #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) + + // OpenCL v2.0 s6.13.11.7.5: +@@ -13501,196 +13483,2239 @@ ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long opera + // or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. + + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand); +-uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand); +-uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); +- +-uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand); +-uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand); +-uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand); +-uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax); +-uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); +-uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); +-uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax); +-uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); +-uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); +- +-intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand); +-intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); +-intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); +-intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand); +-intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); +-intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); +-intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand); +-intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); +-intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); +-intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax); +-intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); +-intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); +-intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax); +-intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); +-intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand); ++ ++uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, ++ intptr_t opermax); ++ ++intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, ++ uintptr_t opermax); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if(__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++int __ovld atomic_fetch_add(volatile atomic_int __global *object, int operand); ++uint __ovld atomic_fetch_add(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_sub(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_sub(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object, ++ uint operand); ++uint __ovld atomic_fetch_sub(volatile atomic_uint __global *object, ++ uint operand); ++int __ovld atomic_fetch_or(volatile atomic_int __global *object, int operand); ++uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object, ++ uint operand); ++uint __ovld atomic_fetch_or(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_or(volatile atomic_uint __local *object, uint operand); ++int __ovld atomic_fetch_xor(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_xor(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_xor(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_xor(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_and(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_and(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_and(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_and(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_min(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_min(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_min(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_min(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_max(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_max(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_max(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_max(volatile atomic_uint __local *object, ++ uint operand); ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++long __ovld atomic_fetch_add(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_add(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_add(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_add(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_sub(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_sub(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_sub(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_sub(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_or(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_or(volatile atomic_long __local *object, long operand); ++ulong __ovld atomic_fetch_or(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_or(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_xor(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_xor(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_xor(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_xor(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_and(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_and(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_and(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_and(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_min(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_min(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_min(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_min(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_max(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_max(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_max(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_max(volatile atomic_ulong __local *object, ++ ulong operand); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++ ++// OpenCL v2.0 s6.13.11.7.5: ++// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument ++// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be ++// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand); ++ ++uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __global *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __local *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __global *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __local *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __global *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __local *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax); ++ ++intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __global *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __local *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __global *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __local *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __global *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __local *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, ++ memory_order order); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, ++ memory_order order, memory_scope scope); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, ++ memory_order order); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++ ++// OpenCL v2.0 s6.13.11.7.5: ++// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument ++// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be ++// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder); ++uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder); ++intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, ++ memory_order order); ++intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, ++ memory_order order); ++intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder); ++intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder); ++#endif // __opencl_c_atomic_scope_device ++uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order, ++ memory_scope scope); ++ ++uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder, ++ memory_scope scope); ++ ++intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder, ++ memory_scope scope); + #endif ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++ ++// OpenCL v2.0 s6.13.11.7.5: ++// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument ++// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be ++// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++uintptr_t __ovld ++atomic_fetch_add_explicit(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_add_explicit(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_sub_explicit(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_sub_explicit(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax, memory_order minder); ++uintptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax, memory_order minder); ++uintptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax, memory_order minder); ++uintptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax, memory_order minder); ++#endif // __opencl_c_atomic_scope_device ++uintptr_t __ovld atomic_fetch_add_explicit( ++ volatile atomic_uintptr_t __global *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_add_explicit( ++ volatile atomic_uintptr_t __local *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_sub_explicit( ++ volatile atomic_uintptr_t __global *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_sub_explicit( ++ volatile atomic_uintptr_t __local *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++ ++uintptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++ ++intptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++intptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++intptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++intptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_store() + ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space + void __ovld atomic_store(volatile atomic_int *object, int desired); +-void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); + void __ovld atomic_store(volatile atomic_uint *object, uint desired); +-void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); + void __ovld atomic_store(volatile atomic_float *object, float desired); +-void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld atomic_store(volatile atomic_double *object, double desired); +-void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); +-#endif //cl_khr_fp64 ++#endif + void __ovld atomic_store(volatile atomic_long *object, long desired); +-void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); + void __ovld atomic_store(volatile atomic_ulong *object, ulong desired); +-void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++void __ovld atomic_store(volatile atomic_int __global *object, int desired); ++void __ovld atomic_store(volatile atomic_int __local *object, int desired); ++void __ovld atomic_store(volatile atomic_uint __global *object, uint desired); ++void __ovld atomic_store(volatile atomic_uint __local *object, uint desired); ++void __ovld atomic_store(volatile atomic_float __global *object, float desired); ++void __ovld atomic_store(volatile atomic_float __local *object, float desired); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++void __ovld atomic_store(volatile atomic_double __global *object, ++ double desired); ++void __ovld atomic_store(volatile atomic_double __local *object, ++ double desired); + #endif ++void __ovld atomic_store(volatile atomic_long __global *object, long desired); ++void __ovld atomic_store(volatile atomic_long __local *object, long desired); ++void __ovld atomic_store(volatile atomic_ulong __global *object, ulong desired); ++void __ovld atomic_store(volatile atomic_ulong __local *object, ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, ++ memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, ++ memory_order order, memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order, memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, ++ memory_order order, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double *object, ++ double desired, memory_order order); ++#endif //__opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, ++ memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, ++ memory_order order); ++#endif //__opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, ++ memory_order order, memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order); ++#endif //__opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // cl_khr_fp64 ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_load() +- ++#ifdef __opencl_c_generic_address_space ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) + int __ovld atomic_load(volatile atomic_int *object); +-int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order); +-int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope); + uint __ovld atomic_load(volatile atomic_uint *object); +-uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order); +-uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope); + float __ovld atomic_load(volatile atomic_float *object); +-float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order); +-float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld atomic_load(volatile atomic_double *object); +-double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order); +-double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope); +-#endif //cl_khr_fp64 ++#endif // cl_khr_fp64 + long __ovld atomic_load(volatile atomic_long *object); +-long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order); +-long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope); + ulong __ovld atomic_load(volatile atomic_ulong *object); +-ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order); +-ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope); +-#endif ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif //__opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++int __ovld atomic_load(volatile atomic_int __global *object); ++int __ovld atomic_load(volatile atomic_int __local *object); ++uint __ovld atomic_load(volatile atomic_uint __global *object); ++uint __ovld atomic_load(volatile atomic_uint __local *object); ++float __ovld atomic_load(volatile atomic_float __global *object); ++float __ovld atomic_load(volatile atomic_float __local *object); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++double __ovld atomic_load(volatile atomic_double __global *object); ++double __ovld atomic_load(volatile atomic_double __local *object); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++long __ovld atomic_load(volatile atomic_long __global *object); ++long __ovld atomic_load(volatile atomic_long __local *object); ++ulong __ovld atomic_load(volatile atomic_ulong __global *object); ++ulong __ovld atomic_load(volatile atomic_ulong __local *object); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int *object, ++ memory_order order); ++uint __ovld atomic_load_explicit(volatile atomic_uint *object, ++ memory_order order); ++float __ovld atomic_load_explicit(volatile atomic_float *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_load_explicit(volatile atomic_uint *object, ++ memory_order order, memory_scope scope); ++float __ovld atomic_load_explicit(volatile atomic_float *object, ++ memory_order order, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long *object, ++ memory_order order); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, ++ memory_order order); ++#endif //__opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long *object, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int __global *object, ++ memory_order order); ++int __ovld atomic_load_explicit(volatile atomic_int __local *object, ++ memory_order order); ++uint __ovld atomic_load_explicit(volatile atomic_uint __global *object, ++ memory_order order); ++uint __ovld atomic_load_explicit(volatile atomic_uint __local *object, ++ memory_order order); ++float __ovld atomic_load_explicit(volatile atomic_float __global *object, ++ memory_order order); ++float __ovld atomic_load_explicit(volatile atomic_float __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int __global *object, ++ memory_order order, memory_scope scope); ++int __ovld atomic_load_explicit(volatile atomic_int __local *object, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_load_explicit(volatile atomic_uint __global *object, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_load_explicit(volatile atomic_uint __local *object, ++ memory_order order, memory_scope scope); ++float __ovld atomic_load_explicit(volatile atomic_float __global *object, ++ memory_order order, memory_scope scope); ++float __ovld atomic_load_explicit(volatile atomic_float __local *object, ++ memory_order order, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double __global *object, ++ memory_order order); ++double __ovld atomic_load_explicit(volatile atomic_double __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double __global *object, ++ memory_order order, memory_scope scope); ++double __ovld atomic_load_explicit(volatile atomic_double __local *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long __global *object, ++ memory_order order); ++long __ovld atomic_load_explicit(volatile atomic_long __local *object, ++ memory_order order); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object, ++ memory_order order); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long __global *object, ++ memory_order order, memory_scope scope); ++long __ovld atomic_load_explicit(volatile atomic_long __local *object, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_exchange() +- ++#ifdef __opencl_c_generic_address_space ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) + int __ovld atomic_exchange(volatile atomic_int *object, int desired); +-int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order); +-int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); + uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired); +-uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order); +-uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); + float __ovld atomic_exchange(volatile atomic_float *object, float desired); +-float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order); +-float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld atomic_exchange(volatile atomic_double *object, double desired); +-double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order); +-double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); +-#endif //cl_khr_fp64 ++#endif + long __ovld atomic_exchange(volatile atomic_long *object, long desired); +-long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order); +-long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); + ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired); +-ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); +-ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++int __ovld atomic_exchange(volatile atomic_int __global *object, int desired); ++int __ovld atomic_exchange(volatile atomic_int __local *object, int desired); ++uint __ovld atomic_exchange(volatile atomic_uint __global *object, ++ uint desired); ++uint __ovld atomic_exchange(volatile atomic_uint __local *object, uint desired); ++float __ovld atomic_exchange(volatile atomic_float __global *object, ++ float desired); ++float __ovld atomic_exchange(volatile atomic_float __local *object, ++ float desired); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++double __ovld atomic_exchange(volatile atomic_double __global *object, ++ double desired); ++double __ovld atomic_exchange(volatile atomic_double __local *object, ++ double desired); + #endif ++long __ovld atomic_exchange(volatile atomic_long __global *object, ++ long desired); ++long __ovld atomic_exchange(volatile atomic_long __local *object, long desired); ++ulong __ovld atomic_exchange(volatile atomic_ulong __global *object, ++ ulong desired); ++ulong __ovld atomic_exchange(volatile atomic_ulong __local *object, ++ ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, ++ memory_order order); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order); ++float __ovld atomic_exchange_explicit(volatile atomic_float *object, ++ float desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order, memory_scope scope); ++float __ovld atomic_exchange_explicit(volatile atomic_float *object, ++ float desired, memory_order order, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double *object, ++ double desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, ++ memory_order order); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ++ ulong desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order); ++int __ovld atomic_exchange_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order); ++float __ovld atomic_exchange_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order); ++float __ovld atomic_exchange_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order, ++ memory_scope scope); ++int __ovld atomic_exchange_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++float __ovld atomic_exchange_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order, ++ memory_scope scope); ++float __ovld atomic_exchange_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order); ++double __ovld atomic_exchange_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order, ++ memory_scope scope); ++double __ovld atomic_exchange_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order); ++long __ovld atomic_exchange_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order, ++ memory_scope scope); ++long __ovld atomic_exchange_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_compare_exchange_strong() and atomic_compare_exchange_weak() +- +-bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure, memory_scope scope); ++#ifdef __opencl_c_generic_address_space ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, ++ int *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, ++ uint *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, ++ int *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, ++ uint *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, ++ float *expected, float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, ++ float *expected, float desired); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, ++ double *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, ++ double *expected, double desired); ++#endif ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, ++ long *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, ++ long *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ++ ulong *expected, ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ++ ulong *expected, ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, ++ int __private *expected, ++ int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, ++ int __private *expected, ++ int desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_uint __global *object, ++ uint __global *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, ++ uint __global *expected, ++ uint desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_uint __global *object, ++ uint __local *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, ++ uint __local *expected, ++ uint desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_uint __global *object, ++ uint __private *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, ++ uint __private *expected, ++ uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, ++ int __private *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, ++ int __private *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, ++ uint __global *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, ++ uint __global *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, ++ uint __local *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, ++ uint __local *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, ++ uint __private *expected, ++ uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, ++ uint __private *expected, ++ uint desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __global *object, ++ float __global *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __local *object, ++ float __global *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __global *object, ++ float __local *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __local *object, ++ float __local *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __global *object, ++ float __private *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __local *object, ++ float __private *expected, float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, ++ float __global *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, ++ float __global *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, ++ float __local *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, ++ float __local *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, ++ float __private *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, ++ float __private *expected, ++ float desired); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 +-bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure, memory_scope scope); +-#endif //cl_khr_fp64 +-bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure, memory_scope scope); ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __global *object, ++ double __global *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __local *object, ++ double __global *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __global *object, ++ double __local *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __local *object, ++ double __local *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __global *object, ++ double __private *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __local *object, ++ double __private *expected, double desired); ++bool __ovld ++atomic_compare_exchange_weak(volatile atomic_double __global *object, ++ double __global *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, ++ double __global *expected, ++ double desired); ++bool __ovld ++atomic_compare_exchange_weak(volatile atomic_double __global *object, ++ double __local *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, ++ double __local *expected, ++ double desired); ++bool __ovld ++atomic_compare_exchange_weak(volatile atomic_double __global *object, ++ double __private *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, ++ double __private *expected, ++ double desired); + #endif ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_long __global *object, ++ long __global *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, ++ long __global *expected, ++ long desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_long __global *object, ++ long __local *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, ++ long __local *expected, ++ long desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_long __global *object, ++ long __private *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, ++ long __private *expected, ++ long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, ++ long __global *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, ++ long __global *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, ++ long __local *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, ++ long __local *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, ++ long __private *expected, ++ long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, ++ long __private *expected, ++ long desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __global *object, ++ ulong __global *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __local *object, ++ ulong __global *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __global *object, ++ ulong __local *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __local *object, ++ ulong __local *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __global *object, ++ ulong __private *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __local *object, ++ ulong __private *expected, ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, ++ ulong __global *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, ++ ulong __global *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, ++ ulong __local *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, ++ ulong __local *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, ++ ulong __private *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, ++ ulong __private *expected, ++ ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint *object, uint *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, ++ uint *expected, uint desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float *object, float *expected, float desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, ++ float *expected, ++ float desired, ++ memory_order success, ++ memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint *object, uint *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, ++ uint *expected, uint desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float *object, float *expected, float desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float *object, float *expected, float desired, ++ memory_order success, memory_order failure, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure, memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long *object, long *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, ++ long *expected, long desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong *object, ulong *expected, ulong desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ++ ulong *expected, ++ ulong desired, ++ memory_order success, ++ memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long *object, long *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, ++ long *expected, long desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong *object, ulong *expected, ulong desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong *object, ulong *expected, ulong desired, ++ memory_order success, memory_order failure, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_flag_test_and_set() and atomic_flag_clear() +- ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++#ifdef __opencl_c_generic_address_space + bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object); +-bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order); +-bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); + void __ovld atomic_flag_clear(volatile atomic_flag *object); +-void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order); +-void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++bool __ovld atomic_flag_test_and_set(volatile atomic_flag __global *object); ++bool __ovld atomic_flag_test_and_set(volatile atomic_flag __local *object); ++void __ovld atomic_flag_clear(volatile atomic_flag __global *object); ++void __ovld atomic_flag_clear(volatile atomic_flag __local *object); ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, ++ memory_order order); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, ++ memory_order order, ++ memory_scope scope); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, ++ memory_order order, memory_scope scope); ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_flag_test_and_set_explicit( ++ volatile atomic_flag __global *object, memory_order order); ++bool __ovld atomic_flag_test_and_set_explicit( ++ volatile atomic_flag __local *object, memory_order order); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object, ++ memory_order order); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld ++atomic_flag_test_and_set_explicit(volatile atomic_flag __global *object, ++ memory_order order, memory_scope scope); ++bool __ovld ++atomic_flag_test_and_set_explicit(volatile atomic_flag __local *object, ++ memory_order order, memory_scope scope); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object, ++ memory_order order, memory_scope scope); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object, ++ memory_order order, memory_scope scope); ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + +@@ -13918,7 +15943,7 @@ float16 __ovld __cnfn shuffle(float4 x, uint16 mask); + float16 __ovld __cnfn shuffle(float8 x, uint16 mask); + float16 __ovld __cnfn shuffle(float16 x, uint16 mask); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld __cnfn shuffle(double2 x, ulong2 mask); + double2 __ovld __cnfn shuffle(double4 x, ulong2 mask); + double2 __ovld __cnfn shuffle(double8 x, ulong2 mask); +@@ -13938,7 +15963,7 @@ double16 __ovld __cnfn shuffle(double2 x, ulong16 mask); + double16 __ovld __cnfn shuffle(double4 x, ulong16 mask); + double16 __ovld __cnfn shuffle(double8 x, ulong16 mask); + double16 __ovld __cnfn shuffle(double16 x, ulong16 mask); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half2 __ovld __cnfn shuffle(half2 x, ushort2 mask); +@@ -14142,7 +16167,7 @@ float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask); + float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask); + float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask); + double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask); + double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask); +@@ -14162,7 +16187,7 @@ double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask); + double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask); + double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask); + double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask); +@@ -14198,6 +16223,7 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))) + #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable + #endif //cl_khr_gl_msaa_sharing + ++#ifdef __opencl_c_images + /** + * Use the coordinate (coord.xy) to do an element lookup in + * the 2D image object specified by image. +@@ -14476,6 +16502,7 @@ half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord); + + // Image read functions for read_write images + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord); + int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord); + uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord); +@@ -14519,6 +16546,7 @@ float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, i + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + #ifdef cl_khr_mipmap_image + float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod); + int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod); +@@ -14569,6 +16597,7 @@ int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, + uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY); + + #endif //cl_khr_mipmap_image ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // Image read functions returning half4 type +@@ -14580,6 +16609,7 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord); + half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord); + half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord); + #endif //cl_khr_fp16 ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -14669,7 +16699,7 @@ void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, flo + void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color); + void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color); + void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color); + void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color); +@@ -14702,7 +16732,7 @@ void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, in + void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float depth); + void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float depth); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color); + void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color); + void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color); +@@ -14715,7 +16745,7 @@ void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 + #ifdef cl_khr_fp16 + void __ovld write_imageh(write_only image1d_t image, int coord, half4 color); + void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color); + #endif + void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color); +@@ -14725,6 +16755,7 @@ void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 col + + // Image write functions for read_write images + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color); + void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color); + void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color); +@@ -14745,7 +16776,7 @@ void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, flo + void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color); + void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color); + void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color); + void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color); +@@ -14777,7 +16808,7 @@ void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, in + void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color); + void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color); + void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color); + void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color); +@@ -14790,13 +16821,14 @@ void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 + #ifdef cl_khr_fp16 + void __ovld write_imageh(read_write image1d_t image, int coord, half4 color); + void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color); + #endif + void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color); + void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color); + void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color); + #endif //cl_khr_fp16 ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have +@@ -14810,7 +16842,7 @@ void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 col + int __ovld __cnfn get_image_width(read_only image1d_t image); + int __ovld __cnfn get_image_width(read_only image1d_buffer_t image); + int __ovld __cnfn get_image_width(read_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_width(read_only image3d_t image); + #endif + int __ovld __cnfn get_image_width(read_only image1d_array_t image); +@@ -14829,7 +16861,7 @@ int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image); + int __ovld __cnfn get_image_width(write_only image1d_t image); + int __ovld __cnfn get_image_width(write_only image1d_buffer_t image); + int __ovld __cnfn get_image_width(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_width(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_width(write_only image1d_array_t image); +@@ -14846,6 +16878,7 @@ int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_width(read_write image1d_t image); + int __ovld __cnfn get_image_width(read_write image1d_buffer_t image); + int __ovld __cnfn get_image_width(read_write image2d_t image); +@@ -14862,6 +16895,7 @@ int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image); + int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -14882,7 +16916,7 @@ int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + + int __ovld __cnfn get_image_height(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_height(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_height(write_only image2d_array_t image); +@@ -14898,6 +16932,7 @@ int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_height(read_write image2d_t image); + int __ovld __cnfn get_image_height(read_write image3d_t image); + int __ovld __cnfn get_image_height(read_write image2d_array_t image); +@@ -14911,6 +16946,7 @@ int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image); + int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -14918,12 +16954,14 @@ int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image); + */ + int __ovld __cnfn get_image_depth(read_only image3d_t image); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_depth(write_only image3d_t image); + #endif + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_depth(read_write image3d_t image); ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // OpenCL Extension v2.0 s9.18 - Mipmaps +@@ -14939,13 +16977,15 @@ int __ovld get_image_num_mip_levels(read_only image3d_t image); + + int __ovld get_image_num_mip_levels(write_only image1d_t image); + int __ovld get_image_num_mip_levels(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld get_image_num_mip_levels(write_only image3d_t image); + #endif + ++#ifdef __opencl_c_read_write_images + int __ovld get_image_num_mip_levels(read_write image1d_t image); + int __ovld get_image_num_mip_levels(read_write image2d_t image); + int __ovld get_image_num_mip_levels(read_write image3d_t image); ++#endif //__opencl_c_read_write_images + + int __ovld get_image_num_mip_levels(read_only image1d_array_t image); + int __ovld get_image_num_mip_levels(read_only image2d_array_t image); +@@ -14957,10 +16997,12 @@ int __ovld get_image_num_mip_levels(write_only image2d_array_t image); + int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image); + int __ovld get_image_num_mip_levels(write_only image2d_depth_t image); + ++#ifdef __opencl_c_read_write_images + int __ovld get_image_num_mip_levels(read_write image1d_array_t image); + int __ovld get_image_num_mip_levels(read_write image2d_array_t image); + int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image); + int __ovld get_image_num_mip_levels(read_write image2d_depth_t image); ++#endif //__opencl_c_read_write_images + + #endif //cl_khr_mipmap_image + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +@@ -15004,7 +17046,7 @@ int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth + int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image); + int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image); + int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image); +@@ -15021,6 +17063,7 @@ int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_dept + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image); + int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image); + int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image); +@@ -15037,6 +17080,7 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t im + int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -15076,7 +17120,7 @@ int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t i + int __ovld __cnfn get_image_channel_order(write_only image1d_t image); + int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image); + int __ovld __cnfn get_image_channel_order(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_channel_order(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image); +@@ -15093,6 +17137,7 @@ int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_channel_order(read_write image1d_t image); + int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image); + int __ovld __cnfn get_image_channel_order(read_write image2d_t image); +@@ -15109,6 +17154,7 @@ int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image) + int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -15143,6 +17189,7 @@ int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int2 __ovld __cnfn get_image_dim(read_write image2d_t image); + int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image); + #ifdef cl_khr_depth_images +@@ -15155,6 +17202,7 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image); + int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image); + int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -15164,11 +17212,13 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); + * component and the w component is 0. + */ + int4 __ovld __cnfn get_image_dim(read_only image3d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int4 __ovld __cnfn get_image_dim(write_only image3d_t image); + #endif + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int4 __ovld __cnfn get_image_dim(read_write image3d_t image); ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -15196,6 +17246,7 @@ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array); + size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array); + #ifdef cl_khr_depth_images +@@ -15205,6 +17256,7 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image + size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array); + size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -15222,16 +17274,21 @@ int __ovld get_image_num_samples(write_only image2d_array_msaa_t image); + int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image); + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld get_image_num_samples(read_write image2d_msaa_t image); + int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image); + int __ovld get_image_num_samples(read_write image2d_array_msaa_t image); + int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image); ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + #endif + ++#endif //__opencl_c_images ++ + // OpenCL v2.0 s6.13.15 - Work-group Functions + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_work_group_collective_functions + int __ovld __conv work_group_all(int predicate); + int __ovld __conv work_group_any(int predicate); + +@@ -15255,11 +17312,11 @@ ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z); + float __ovld __conv work_group_broadcast(float a, size_t local_id); + float __ovld __conv work_group_broadcast(float a, size_t x, size_t y); + float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv work_group_broadcast(double a, size_t local_id); + double __ovld __conv work_group_broadcast(double a, size_t x, size_t y); + double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld __conv work_group_reduce_add(half x); +@@ -15317,7 +17374,7 @@ float __ovld __conv work_group_scan_exclusive_max(float x); + float __ovld __conv work_group_scan_inclusive_add(float x); + float __ovld __conv work_group_scan_inclusive_min(float x); + float __ovld __conv work_group_scan_inclusive_max(float x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv work_group_reduce_add(double x); + double __ovld __conv work_group_reduce_min(double x); + double __ovld __conv work_group_reduce_max(double x); +@@ -15327,19 +17384,18 @@ double __ovld __conv work_group_scan_exclusive_max(double x); + double __ovld __conv work_group_scan_inclusive_add(double x); + double __ovld __conv work_group_scan_inclusive_min(double x); + double __ovld __conv work_group_scan_inclusive_max(double x); +-#endif //cl_khr_fp64 +- ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_work_group_collective_functions + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // OpenCL v2.0 s6.13.16 - Pipe Functions +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_pipes + bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +- ++#endif //__opencl_c_pipes + + // OpenCL v2.0 s6.13.17 - Enqueue Kernels + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +- ++#ifdef __opencl_c_device_enqueue + ndrange_t __ovld ndrange_1D(size_t); + ndrange_t __ovld ndrange_1D(size_t, size_t); + ndrange_t __ovld ndrange_1D(size_t, size_t, size_t); +@@ -15367,11 +17423,13 @@ bool __ovld is_valid_event (clk_event_t event); + void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value); + + queue_t __ovld get_default_queue(void); ++#endif //__opencl_c_device_enqueue + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // OpenCL Extension v2.0 s9.17 - Sub-groups + +-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ++#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ ++ defined(__opencl_c_subgroups) + // Shared Sub Group Functions + uint __ovld get_sub_group_size(void); + uint __ovld get_max_sub_group_size(void); +@@ -15457,7 +17515,7 @@ half __ovld __conv sub_group_scan_inclusive_min(half x); + half __ovld __conv sub_group_scan_inclusive_max(half x); + #endif //cl_khr_fp16 + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id); + double __ovld __conv sub_group_reduce_add(double x); + double __ovld __conv sub_group_reduce_min(double x); +@@ -15468,7 +17526,7 @@ double __ovld __conv sub_group_scan_exclusive_max(double x); + double __ovld __conv sub_group_scan_inclusive_add(double x); + double __ovld __conv sub_group_scan_inclusive_min(double x); + double __ovld __conv sub_group_scan_inclusive_max(double x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #endif //cl_khr_subgroups cl_intel_subgroups + +@@ -15570,16 +17628,22 @@ uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c ); + long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c ); + ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c ); + ++#ifdef __opencl_c_images + uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord ); + uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord ); + uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord ); + uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord ); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord); + uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord); + uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord); + uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord); ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + uint __ovld __conv intel_sub_group_block_read( const __global uint* p ); +@@ -15587,16 +17651,22 @@ uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p ); + uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p ); + uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p ); + ++#ifdef __opencl_c_images + void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data); + void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data); + void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data); + void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data); + void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data); + void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data); + void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data); ++#endif // __opencl_c_read_write_images ++#endif // __opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data ); +@@ -15611,7 +17681,7 @@ half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c ); + half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c ); + #endif + +-#if defined(cl_khr_fp64) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv intel_sub_group_shuffle( double x, uint c ); + double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c ); + double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c ); +@@ -15710,16 +17780,22 @@ ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x ); + short __ovld __conv intel_sub_group_scan_inclusive_max( short x ); + ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x ); + ++#ifdef __opencl_c_images + uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord ); + uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord ); + uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord ); + uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord ); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord ); + uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord ); + uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord ); + uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord ); ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p ); +@@ -15727,16 +17803,22 @@ uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p + uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p ); + uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p ); + ++#ifdef __opencl_c_images + void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data ); + void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data ); + void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data ); + void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data ); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data ); + void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data ); + void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data ); + void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data ); ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data ); +@@ -15744,16 +17826,22 @@ void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint + void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data ); + void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data ); + ++#ifdef __opencl_c_images + ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord ); + ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord ); + ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord ); + ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord ); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord); + ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord); + ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord); + ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord); ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p ); +@@ -15761,16 +17849,22 @@ ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* + ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p ); + ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p ); + ++#ifdef __opencl_c_images + void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data); + void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data); + void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data); + void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data); + void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data); + void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data); + void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data); ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data ); +@@ -15889,6 +17983,7 @@ short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset( + short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size, + ushort2 image_size); + ++#ifdef __opencl_c_images + intel_sub_group_avc_ime_result_t __ovld + intel_sub_group_avc_ime_evaluate_with_single_reference( + read_only image2d_t src_image, read_only image2d_t ref_image, +@@ -15929,6 +18024,7 @@ intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout( + read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, + intel_sub_group_avc_ime_payload_t payload, + intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components); ++#endif //__opencl_c_images + + intel_sub_group_avc_ime_single_reference_streamin_t __ovld + intel_sub_group_avc_ime_get_single_reference_streamin( +@@ -15993,6 +18089,7 @@ intel_sub_group_avc_ref_payload_t __ovld + intel_sub_group_avc_ref_set_bilinear_filter_enable( + intel_sub_group_avc_ref_payload_t payload); + ++#ifdef __opencl_c_images + intel_sub_group_avc_ref_result_t __ovld + intel_sub_group_avc_ref_evaluate_with_single_reference( + read_only image2d_t src_image, read_only image2d_t ref_image, +@@ -16011,6 +18108,7 @@ intel_sub_group_avc_ref_evaluate_with_multi_reference( + read_only image2d_t src_image, uint packed_reference_ids, + uchar packed_reference_field_polarities, sampler_t vme_media_sampler, + intel_sub_group_avc_ref_payload_t payload); ++#endif //__opencl_c_images + + // SIC built-in functions + intel_sub_group_avc_sic_payload_t __ovld +@@ -16061,6 +18159,7 @@ intel_sub_group_avc_sic_set_block_based_raw_skip_sad( + uchar block_based_skip_type, + intel_sub_group_avc_sic_payload_t payload); + ++#ifdef __opencl_c_images + intel_sub_group_avc_sic_result_t __ovld + intel_sub_group_avc_sic_evaluate_ipe( + read_only image2d_t src_image, sampler_t vme_media_sampler, +@@ -16083,6 +18182,7 @@ intel_sub_group_avc_sic_evaluate_with_multi_reference( + read_only image2d_t src_image, uint packed_reference_ids, + uchar packed_reference_field_polarities, sampler_t vme_media_sampler, + intel_sub_group_avc_sic_payload_t payload); ++#endif //__opencl_c_images + + uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape( + intel_sub_group_avc_sic_result_t result); +diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp +index 6353e14bc41a..a8c216de6e04 100644 +--- a/clang/lib/Parse/ParseDecl.cpp ++++ b/clang/lib/Parse/ParseDecl.cpp +@@ -3904,8 +3904,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, + isInvalid = DS.SetTypeAltiVecBool(true, Loc, PrevSpec, DiagID, Policy); + break; + case tok::kw_pipe: +- if (!getLangOpts().OpenCL || (getLangOpts().OpenCLVersion < 200 && +- !getLangOpts().OpenCLCPlusPlus)) { ++ if (!getLangOpts().OpenCLPipeKeyword) { + // OpenCL 2.0 defined this keyword. OpenCL 1.2 and earlier should + // support the "pipe" word as identifier. + Tok.getIdentifierInfo()->revertTokenIDToIdentifier(); +@@ -4027,8 +4026,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, + case tok::kw___generic: + // generic address space is introduced only in OpenCL v2.0 + // see OpenCL C Spec v2.0 s6.5.5 +- if (Actions.getLangOpts().OpenCLVersion < 200 && +- !Actions.getLangOpts().OpenCLCPlusPlus) { ++ if (!Actions.getLangOpts().OpenCLGenericKeyword) { + DiagID = diag::err_opencl_unknown_type_specifier; + PrevSpec = Tok.getIdentifierInfo()->getNameStart(); + isInvalid = true; +@@ -5050,8 +5048,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { + default: return false; + + case tok::kw_pipe: +- return (getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200) || +- getLangOpts().OpenCLCPlusPlus; ++ return getLangOpts().OpenCLPipeKeyword; + + case tok::identifier: // foo::bar + // Unfortunate hack to support "Class.factoryMethod" notation. +@@ -5557,8 +5554,7 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang, + if (Kind == tok::star || Kind == tok::caret) + return true; + +- if (Kind == tok::kw_pipe && +- ((Lang.OpenCL && Lang.OpenCLVersion >= 200) || Lang.OpenCLCPlusPlus)) ++ if (Kind == tok::kw_pipe && Lang.OpenCLPipeKeyword) + return true; + + if (!Lang.CPlusPlus) +diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp +index df411e1928d6..9a40ce888695 100644 +--- a/clang/lib/Parse/ParsePragma.cpp ++++ b/clang/lib/Parse/ParsePragma.cpp +@@ -697,12 +697,14 @@ void Parser::HandlePragmaOpenCLExtension() { + if (Name == "all") { + if (State == Disable) { + Opt.disableAll(); +- Opt.enableSupportedCore(getLangOpts()); ++ Opt.enableSupportedCore(); + } else { + PP.Diag(NameLoc, diag::warn_pragma_expected_predicate) << 1; + } ++ } else if (Opt.isFeature(Name)) { ++ PP.Diag(NameLoc, diag::warn_opencl_pragma_feature_ignore) << Ident; + } else if (State == Begin) { +- if (!Opt.isKnown(Name) || !Opt.isSupported(Name, getLangOpts())) { ++ if (!Opt.isKnown(Name) || !Opt.isSupported(Name)) { + Opt.support(Name); + } + Actions.setCurrentOpenCLExtension(Name); +@@ -712,9 +714,9 @@ void Parser::HandlePragmaOpenCLExtension() { + Actions.setCurrentOpenCLExtension(""); + } else if (!Opt.isKnown(Name)) + PP.Diag(NameLoc, diag::warn_pragma_unknown_extension) << Ident; +- else if (Opt.isSupportedExtension(Name, getLangOpts())) ++ else if (Opt.isSupportedExtension(Name)) + Opt.enable(Name, State == Enable); +- else if (Opt.isSupportedCore(Name, getLangOpts())) ++ else if (Opt.isSupportedCore(Name)) + PP.Diag(NameLoc, diag::warn_pragma_extension_is_core) << Ident; + else + PP.Diag(NameLoc, diag::warn_pragma_unsupported_extension) << Ident; +diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td +index 9d6bb411eff8..d352d35f1e46 100644 +--- a/clang/lib/Sema/OpenCLBuiltins.td ++++ b/clang/lib/Sema/OpenCLBuiltins.td +@@ -22,11 +22,13 @@ + class Version { + int ID = _Version; + } ++ + def CLAll : Version< 0>; + def CL10 : Version<100>; + def CL11 : Version<110>; + def CL12 : Version<120>; + def CL20 : Version<200>; ++def CL30 : Version<300>; + + // Address spaces + // Pointer types need to be assigned an address space. +@@ -65,6 +67,14 @@ def FuncExtKhrGlMsaaSharing : FunctionExtension<"cl_khr_gl_msaa_sha + // Multiple extensions + def FuncExtKhrMipmapAndWrite3d : FunctionExtension<"cl_khr_mipmap_image cl_khr_3d_image_writes">; + ++// Features ++def FuncExtGenericAddressSpace : FunctionExtension<"__opencl_c_generic_address_space">; ++def FuncExtWorkGroupCollective : FunctionExtension<"__opencl_c_work_group_collective_functions">; ++def FuncExtPipes : FunctionExtension<"__opencl_c_pipes">; ++def FuncExtDeviceSidEenqueue : FunctionExtension<"__opencl_c_device_enqueue">; ++def FuncNonExplicitAtomicFeatures : FunctionExtension<"__opencl_c_atomic_order_seq_cst __opencl_c_atomic_scope_device">; ++def FuncNonExplicitAtomicFeaturesAndGenericAS : FunctionExtension<"__opencl_c_atomic_order_seq_cst __opencl_c_atomic_scope_device __opencl_c_generic_address_space">; ++ + // Qualified Type. These map to ASTContext::QualType. + class QualType { + // Name of the field or function in a clang::ASTContext +@@ -230,13 +240,9 @@ class Builtin _Signature, list _Attributes = Attr. + bit IsConv = _Attributes[2]; + // OpenCL extensions to which the function belongs. + FunctionExtension Extension = FuncExtNone; +- // Version of OpenCL from which the function is available (e.g.: CL10). +- // MinVersion is inclusive. +- Version MinVersion = CL10; +- // Version of OpenCL from which the function is not supported anymore. +- // MaxVersion is exclusive. ++ // List of OpenCL version in which this function available. + // CLAll makes the function available for all versions. +- Version MaxVersion = CLAll; ++ list Versions = [CLAll]; + } + + //===----------------------------------------------------------------------===// +@@ -398,7 +404,7 @@ foreach name = ["get_global_size", "get_global_id", "get_local_size", + def : Builtin; + } + +-let MinVersion = CL20 in { ++let Versions = [CL20, CL30] in { + def : Builtin<"get_enqueued_local_size", [Size, UInt]>; + foreach name = ["get_global_linear_id", "get_local_linear_id"] in { + def : Builtin; +@@ -471,7 +477,7 @@ foreach name = ["fma", "mad"] in { + } + + // --- Version dependent --- +-let MaxVersion = CL20 in { ++let Versions = [CL10, CL11, CL12, CL30] in { + foreach AS = [GlobalAS, LocalAS, PrivateAS] in { + foreach name = ["fract", "modf", "sincos"] in { + def : Builtin]>; +@@ -488,7 +494,9 @@ let MaxVersion = CL20 in { + } + } + } +-let MinVersion = CL20 in { ++ ++let Versions = [CL20, CL30] in { ++ let Extension = FuncExtGenericAddressSpace in { + foreach name = ["fract", "modf", "sincos"] in { + def : Builtin]>; + } +@@ -501,6 +509,7 @@ let MinVersion = CL20 in { + def : Builtin]>; + } + } ++ } + } + + // --- Table 9 --- +@@ -531,7 +540,7 @@ foreach name = ["abs"] in { + foreach name = ["clz", "popcount"] in { + def : Builtin; + } +-let MinVersion = CL20 in { ++let Versions = [CL20, CL30] in { + foreach name = ["ctz"] in { + def : Builtin; + } +@@ -705,7 +714,7 @@ foreach name = ["select"] in { + // --- Table 15 --- + // Variants for OpenCL versions below 2.0, using pointers to the global, local + // and private address spaces. +-let MaxVersion = CL20 in { ++let Versions = [CL10, CL11, CL12, CL30] in { + foreach AS = [GlobalAS, LocalAS, PrivateAS] in { + foreach VSize = [2, 3, 4, 8, 16] in { + foreach name = ["vload" # VSize] in { +@@ -748,7 +757,8 @@ let MaxVersion = CL20 in { + } + // Variants for OpenCL versions above 2.0, using pointers to the generic + // address space. +-let MinVersion = CL20 in { ++let Versions = [CL20, CL30] in { ++ let Extension = FuncExtGenericAddressSpace in { + foreach VSize = [2, 3, 4, 8, 16] in { + foreach name = ["vload" # VSize] in { + def : Builtin, Size, PointerType, GenericAS>]>; +@@ -786,6 +796,7 @@ let MinVersion = CL20 in { + } + } + } ++ } + } + // Variants using pointers to the constant address space. + foreach VSize = [2, 3, 4, 8, 16] in { +@@ -812,7 +823,7 @@ foreach VSize = [2, 3, 4, 8, 16] in { + } + } + } +-let MaxVersion = CL20 in { ++let Versions = [CL10, CL11, CL12, CL30] in { + foreach AS = [GlobalAS, LocalAS, PrivateAS] in { + def : Builtin<"vload_half", [Float, Size, PointerType, AS>]>; + foreach VSize = [2, 3, 4, 8, 16] in { +@@ -832,7 +843,8 @@ let MaxVersion = CL20 in { + } + } + } +-let MinVersion = CL20 in { ++let Versions = [CL20, CL30] in { ++ let Extension = FuncExtGenericAddressSpace in { + foreach AS = [GenericAS] in { + def : Builtin<"vload_half", [Float, Size, PointerType, AS>]>; + foreach VSize = [2, 3, 4, 8, 16] in { +@@ -851,6 +863,7 @@ let MinVersion = CL20 in { + } + } + } ++ } + } + + foreach AS = [ConstantAS] in { +@@ -875,7 +888,9 @@ foreach name = ["async_work_group_strided_copy"] in { + def : Builtin, PointerType, LocalAS>, Size, Size, Event]>; + } + foreach name = ["wait_group_events"] in { ++ let Extension = FuncExtGenericAddressSpace in { + def : Builtin]>; ++ } + } + foreach name = ["prefetch"] in { + def : Builtin, GlobalAS>, Size]>; +@@ -1154,7 +1169,8 @@ foreach aQual = ["WO", "RW"] in { + //-------------------------------------------------------------------- + // OpenCL v2.0 s6.13.15 - Work-group Functions + // --- Table 26 --- +-let MinVersion = CL20 in { ++let Versions = [CL20, CL30] in { ++ let Extension = FuncExtWorkGroupCollective in { + foreach name = ["work_group_all", "work_group_any"] in { + def : Builtin; + } +@@ -1169,11 +1185,12 @@ let MinVersion = CL20 in { + def : Builtin; + } + } ++ } + } + + + // OpenCL v2.0 s9.17.3: Additions to section 6.13.1: Work-Item Functions +-let MinVersion = CL20 in { ++let Versions = [CL20] in { + let Extension = FuncExtKhrSubgroups in { + def get_sub_group_size : Builtin<"get_sub_group_size", [UInt]>; + def get_max_sub_group_size : Builtin<"get_max_sub_group_size", [UInt]>; +diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp +index 9cfce5a63b1d..c26f45f62668 100644 +--- a/clang/lib/Sema/Sema.cpp ++++ b/clang/lib/Sema/Sema.cpp +@@ -286,9 +286,10 @@ void Sema::Initialize() { + // Initialize predefined OpenCL types and supported extensions and (optional) + // core features. + if (getLangOpts().OpenCL) { ++ getOpenCLOptions().setOpenCLVersion(getLangOpts()); + getOpenCLOptions().addSupport( + Context.getTargetInfo().getSupportedOpenCLOpts()); +- getOpenCLOptions().enableSupportedCore(getLangOpts()); ++ getOpenCLOptions().enableSupportedCore(); + addImplicitTypedef("sampler_t", Context.OCLSamplerTy); + addImplicitTypedef("event_t", Context.OCLEventTy); + if (getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) { +@@ -344,12 +345,18 @@ void Sema::Initialize() { + "cl_khr_int64_base_atomics cl_khr_int64_extended_atomics"); + + setOpenCLExtensionForType(AtomicDoubleT, "cl_khr_fp64"); ++ setOpenCLExtensionForType(Context.OCLReserveIDTy, "__opencl_c_pipes"); ++ setOpenCLExtensionForType(Context.OCLClkEventTy, ++ "__opencl_c_device_enqueue"); ++ setOpenCLExtensionForType(Context.OCLQueueTy, ++ "__opencl_c_device_enqueue"); + } + + setOpenCLExtensionForType(Context.DoubleTy, "cl_khr_fp64"); + +-#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \ +- setOpenCLExtensionForType(Context.Id, Ext); ++#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \ ++ setOpenCLExtensionForType(Context.Id, Ext); \ ++ setOpenCLExtensionForType(Context.Id, "__opencl_c_images"); + #include "clang/Basic/OpenCLImageTypes.def" + #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + addImplicitTypedef(#ExtType, Context.Id##Ty); \ +@@ -2293,6 +2300,27 @@ bool Sema::isOpenCLDisabledDecl(Decl *FD) { + return false; + } + ++template ++void Sema::DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName, ++ DiagLocT DiagLoc, DiagInfoT DiagInfo, ++ unsigned Selector, ++ SourceRange SrcRange) { ++ const auto &LO = getLangOpts(); ++ auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; ++ // For versions higher that 3.0 diagnosing feature ++ if (CLVer >= 300) { ++ OpenCLOptName = ++ llvm::StringSwitch(OpenCLOptName) ++ .Case("cl_khr_3d_image_writes", "__opencl_c_3d_image_writes") ++ .Case("cl_khr_subgroups", "__opencl_c_subgroups") ++ .Case("cl_khr_fp64", "__opencl_c_fp64") ++ .Default(OpenCLOptName); ++ } ++ ++ Diag(DiagLoc, diag::err_opencl_requires_extension) ++ << Selector << DiagInfo << OpenCLOptName << SrcRange; ++} ++ + template + bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, + DiagInfoT DiagInfo, MapT &Map, +@@ -2304,8 +2332,7 @@ bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, + bool Disabled = false; + for (auto &I : Loc->second) { + if (I != CurrOpenCLExtension && !getOpenCLOptions().isEnabled(I)) { +- Diag(DiagLoc, diag::err_opencl_requires_extension) << Selector << DiagInfo +- << I << SrcRange; ++ DiagnoseOpenCLRequiresOption(I, DiagLoc, DiagInfo, Selector, SrcRange); + Disabled = true; + } + } +@@ -2341,3 +2368,13 @@ bool Sema::checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E) { + return checkOpenCLDisabledTypeOrDecl(&D, E.getBeginLoc(), FnName, + OpenCLDeclExtMap, 1, D.getSourceRange()); + } ++ ++bool Sema::checkOpenCLSubgroupExtForCallExpr(CallExpr *Call) { ++ if (!getOpenCLOptions().isEnabled("cl_khr_subgroups")) { ++ DiagnoseOpenCLRequiresOption("cl_khr_subgroups", Call->getBeginLoc(), ++ Call->getDirectCallee(), 1, ++ Call->getSourceRange()); ++ return true; ++ } ++ return false; ++} +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index 74742023d1b3..efa3b6ab0eb6 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -597,20 +597,11 @@ static bool checkOpenCLBlockArgs(Sema &S, Expr *BlockArg) { + return IllegalParams; + } + +-static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) { +- if (!S.getOpenCLOptions().isEnabled("cl_khr_subgroups")) { +- S.Diag(Call->getBeginLoc(), diag::err_opencl_requires_extension) +- << 1 << Call->getDirectCallee() << "cl_khr_subgroups"; +- return true; +- } +- return false; +-} +- + static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) { + if (checkArgCount(S, TheCall, 2)) + return true; + +- if (checkOpenCLSubgroupExt(S, TheCall)) ++ if (S.checkOpenCLSubgroupExtForCallExpr(TheCall)) + return true; + + // First argument is an ndrange_t type. +@@ -1564,7 +1555,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, + break; + case Builtin::BIsub_group_reserve_read_pipe: + case Builtin::BIsub_group_reserve_write_pipe: +- if (checkOpenCLSubgroupExt(*this, TheCall) || ++ if (checkOpenCLSubgroupExtForCallExpr(TheCall) || + SemaBuiltinReserveRWPipe(*this, TheCall)) + return ExprError(); + break; +@@ -1577,7 +1568,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, + break; + case Builtin::BIsub_group_commit_read_pipe: + case Builtin::BIsub_group_commit_write_pipe: +- if (checkOpenCLSubgroupExt(*this, TheCall) || ++ if (checkOpenCLSubgroupExtForCallExpr(TheCall) || + SemaBuiltinCommitRWPipe(*this, TheCall)) + return ExprError(); + break; +@@ -4314,6 +4305,20 @@ DiagnoseCStringFormatDirectiveInCFAPI(Sema &S, + } + } + ++bool Sema::isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const { ++ assert(llvm::isValidAtomicOrderingCABI(Ordering)); ++ auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering; ++ switch (OrderingCABI) { ++ case llvm::AtomicOrderingCABI::acquire: ++ case llvm::AtomicOrderingCABI::release: ++ case llvm::AtomicOrderingCABI::acq_rel: ++ return OpenCLFeatures.isEnabled("__opencl_c_atomic_order_acq_rel"); ++ ++ default: ++ return true; ++ } ++} ++ + /// Determine whether the given type has a non-null nullability annotation. + static bool isNonNullType(ASTContext &ctx, QualType type) { + if (auto nullability = type->getNullability(ctx)) +@@ -5067,10 +5072,17 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, + if (SubExprs.size() >= 2 && Form != Init) { + llvm::APSInt Result(32); + if (SubExprs[1]->isIntegerConstantExpr(Result, Context) && +- !isValidOrderingForOp(Result.getSExtValue(), Op)) ++ !isValidOrderingForOp(Result.getSExtValue(), Op)) { + Diag(SubExprs[1]->getBeginLoc(), + diag::warn_atomic_op_has_invalid_memory_order) + << SubExprs[1]->getSourceRange(); ++ } else if (IsOpenCL && ++ !isSupportedOpenCLOMemoryOrdering(Result.getSExtValue())) { ++ Diag(SubExprs[1]->getBeginLoc(), ++ diag::err_opencl_memory_ordering_require_feat) ++ << SubExprs[1]->getSourceRange(); ++ return ExprError(); ++ } + } + + if (auto ScopeModel = AtomicExpr::getScopeModel(Op)) { +diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp +index 64146f4a912f..c1e629b5dc76 100644 +--- a/clang/lib/Sema/SemaDecl.cpp ++++ b/clang/lib/Sema/SemaDecl.cpp +@@ -6152,7 +6152,9 @@ void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) { + if (Type->isSamplerT() || Type->isVoidType()) + return; + LangAS ImplAS = LangAS::opencl_private; +- if ((getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) && ++ if ((getLangOpts().OpenCLCPlusPlus || ++ getOpenCLOptions().isEnabled( ++ "__opencl_c_program_scope_global_variables")) && + Var->hasGlobalStorage()) + ImplAS = LangAS::opencl_global; + // If the original type from a decayed type is an array type and that array +@@ -7682,6 +7684,10 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { + // OpenCL C v2.0 s6.5.1 - Variables defined at program scope and static + // variables inside a function can also be declared in the global + // address space. ++ // OpenCL C v3.0 s5.5 - For OpenCL C 2.0, or with the ++ // __opencl_c_program_scope_global_variables feature macro, the ++ // address space for a variable at program scope or a static variable ++ // also be __global + // C++ for OpenCL inherits rule from OpenCL C v2.0. + // FIXME: Adding local AS in C++ for OpenCL might make sense. + if (NewVD->isFileVarDecl() || NewVD->isStaticLocal() || +@@ -7689,10 +7695,11 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { + if (!T->isSamplerT() && + !(T.getAddressSpace() == LangAS::opencl_constant || + (T.getAddressSpace() == LangAS::opencl_global && +- (getLangOpts().OpenCLVersion == 200 || +- getLangOpts().OpenCLCPlusPlus)))) { ++ (OpenCLFeatures.isEnabled( ++ "__opencl_c_program_scope_global_variables"))))) { + int Scope = NewVD->isStaticLocal() | NewVD->hasExternalStorage() << 1; +- if (getLangOpts().OpenCLVersion == 200 || getLangOpts().OpenCLCPlusPlus) ++ if (OpenCLFeatures.isEnabled( ++ "__opencl_c_program_scope_global_variables")) + Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space) + << Scope << "global or constant"; + else +diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp +index 849bc09063b3..c963b95b131a 100644 +--- a/clang/lib/Sema/SemaDeclAttr.cpp ++++ b/clang/lib/Sema/SemaDeclAttr.cpp +@@ -6362,7 +6362,7 @@ static void handleInternalLinkageAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + } + + static void handleOpenCLNoSVMAttr(Sema &S, Decl *D, const ParsedAttr &AL) { +- if (S.LangOpts.OpenCLVersion != 200) ++ if (S.LangOpts.OpenCLVersion < 200) + S.Diag(AL.getLoc(), diag::err_attribute_requires_opencl_version) + << AL << "2.0" << 0; + else +@@ -6446,6 +6446,13 @@ static void handleOpenCLAccessAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + << AL << PDecl->getType() << DeclTy->isImageType(); + D->setInvalidDecl(true); + return; ++ } else if ((!S.getLangOpts().OpenCLCPlusPlus && ++ S.getLangOpts().OpenCLVersion >= 200) && ++ !S.getOpenCLOptions().isEnabled( ++ "__opencl_c_read_write_images")) { ++ S.Diag(AL.getLoc(), diag::err_opencl_requires_extension) ++ << 0 << PDecl->getType() << "__opencl_c_read_write_images"; ++ return; + } + } + } +diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp +index 831e55046e80..4481a59b4517 100644 +--- a/clang/lib/Sema/SemaDeclCXX.cpp ++++ b/clang/lib/Sema/SemaDeclCXX.cpp +@@ -14906,6 +14906,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl, + if (auto *PtrTy = ResultType->getAs()) { + ResultType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy); + } ++ if (CanQual ExpectedPtrTy = ++ ExpectedResultType->getAs()) { ++ ExpectedResultType = SemaRef.Context.getCanonicalType( ++ RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr())); ++ } + } + + // Check that the result type is what we expect. +@@ -14939,6 +14944,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl, + FnDecl->getParamDecl(0)->getType()->getAs()) { + FirstParamType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy); + } ++ if (CanQual ExpectedPtrTy = ++ ExpectedFirstParamType->getAs()) { ++ ExpectedFirstParamType = SemaRef.Context.getCanonicalType( ++ RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr())); ++ } + } + if (SemaRef.Context.getCanonicalType(FirstParamType).getUnqualifiedType() != + ExpectedFirstParamType) +diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp +index 8d96404a5c27..8f21203b9508 100644 +--- a/clang/lib/Sema/SemaLookup.cpp ++++ b/clang/lib/Sema/SemaLookup.cpp +@@ -771,19 +771,20 @@ static void InsertOCLBuiltinDeclarationsFromTable(Sema &S, LookupResult &LR, + // as argument. Only meaningful for generic types, otherwise equals 1. + unsigned GenTypeMaxCnt; + ++ ASTContext &Context = S.Context; ++ ++ // Ignore this BIF if its version does not match the language options. ++ unsigned OpenCLVersion = Context.getLangOpts().OpenCLVersion; ++ if (Context.getLangOpts().OpenCLCPlusPlus) ++ OpenCLVersion = 200; ++ ++ unsigned short VersionCode = OpenCLOptions::EncodeVersion(OpenCLVersion); ++ + for (unsigned SignatureIndex = 0; SignatureIndex < Len; SignatureIndex++) { + const OpenCLBuiltinStruct &OpenCLBuiltin = + BuiltinTable[FctIndex + SignatureIndex]; +- ASTContext &Context = S.Context; + +- // Ignore this BIF if its version does not match the language options. +- unsigned OpenCLVersion = Context.getLangOpts().OpenCLVersion; +- if (Context.getLangOpts().OpenCLCPlusPlus) +- OpenCLVersion = 200; +- if (OpenCLVersion < OpenCLBuiltin.MinVersion) +- continue; +- if ((OpenCLBuiltin.MaxVersion != 0) && +- (OpenCLVersion >= OpenCLBuiltin.MaxVersion)) ++ if (!(OpenCLBuiltin.AllVersions & VersionCode)) + continue; + + SmallVector RetTypes; +diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp +index 93ddd047e09b..93211b801f8d 100644 +--- a/clang/lib/Sema/SemaType.cpp ++++ b/clang/lib/Sema/SemaType.cpp +@@ -2016,7 +2016,7 @@ static QualType deduceOpenCLPointeeAddrSpace(Sema &S, QualType PointeeType) { + !PointeeType.hasAddressSpace()) + PointeeType = S.getASTContext().getAddrSpaceQualType( + PointeeType, +- S.getLangOpts().OpenCLCPlusPlus || S.getLangOpts().OpenCLVersion == 200 ++ S.getOpenCLOptions().isEnabled("__opencl_c_generic_address_space") + ? LangAS::opencl_generic + : LangAS::opencl_private); + return PointeeType; +@@ -5160,9 +5160,15 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, + } + + case DeclaratorChunk::Pipe: { +- T = S.BuildReadPipeType(T, DeclType.Loc); +- processTypeAttrs(state, T, TAL_DeclSpec, +- D.getMutableDeclSpec().getAttributes()); ++ if (S.getOpenCLOptions().isEnabled("__opencl_c_pipes")) { ++ T = S.BuildReadPipeType(T, DeclType.Loc); ++ processTypeAttrs(state, T, TAL_DeclSpec, ++ D.getMutableDeclSpec().getAttributes()); ++ } else { ++ D.setInvalidType(true); ++ T = Context.IntTy; ++ S.Diag(D.getIdentifierLoc(), diag::err_opencl_pipes_require_feat); ++ } + break; + } + } +diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +index cdbf28bbcad8..0bedff5ef0f3 100644 +--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl ++++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +@@ -2,7 +2,8 @@ + // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s + // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s + // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s +- ++// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s + typedef int int2 __attribute__((ext_vector_type(2))); + + typedef struct { +@@ -39,7 +40,7 @@ struct LargeStructTwoMember { + int2 y[20]; + }; + +-#if __OPENCL_C_VERSION__ >= 200 ++#ifdef __opencl_c_program_scope_global_variables + struct LargeStructOneMember g_s; + #endif + +@@ -98,7 +99,7 @@ void FuncOneLargeMember(struct LargeStructOneMember u) { + // AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)* + // AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false) + // AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) +-#if __OPENCL_C_VERSION__ >= 200 ++#ifdef __opencl_c_program_scope_global_variables + void test_indirect_arg_globl(void) { + FuncOneLargeMember(g_s); + } +diff --git a/clang/test/CodeGenOpenCL/address-spaces-conversions.cl b/clang/test/CodeGenOpenCL/address-spaces-conversions.cl +index 52feccc540bb..9ecffcca5ee9 100644 +--- a/clang/test/CodeGenOpenCL/address-spaces-conversions.cl ++++ b/clang/test/CodeGenOpenCL/address-spaces-conversions.cl +@@ -1,5 +1,7 @@ + // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck %s + // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s ++// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s + // When -ffake-address-space-map is not used, all addr space mapped to 0 for x86_64. + + // test that we generate address space casts everywhere we need conversions of +diff --git a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl +index 50622f099143..e19ec8451d0d 100644 +--- a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl ++++ b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl +@@ -6,6 +6,7 @@ + // We check that the address spaces are mangled the same in both version of OpenCL + // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s + // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s + + // We can't name this f as private is equivalent to default + // no specifier given address space so we get multiple definition +@@ -47,7 +48,7 @@ void f(constant int *arg) { } + // OCL-20-DAG: @_Z1fPU3AS2i + // OCL-12-DAG: @_Z1fPU3AS2i + +-#if __OPENCL_C_VERSION__ >= 200 ++#if __OPENCL_C_VERSION__ == 200 + __attribute__((overloadable)) + void f(generic int *arg) { } + // ASMANG20: @_Z1fPU3AS4i +diff --git a/clang/test/CodeGenOpenCL/address-spaces.cl b/clang/test/CodeGenOpenCL/address-spaces.cl +index 3c8fea2a80bc..26a741338b3a 100644 +--- a/clang/test/CodeGenOpenCL/address-spaces.cl ++++ b/clang/test/CodeGenOpenCL/address-spaces.cl +@@ -1,9 +1,13 @@ + // RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR + // RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR + // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s + // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20AMDGCN + // RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s + // RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s + + // SPIR: %struct.S = type { i32, i32, i32* } + // CL20SPIR: %struct.S = type { i32, i32, i32 addrspace(4)* } +diff --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl +index 7216cb517495..8d3b30fe8074 100644 +--- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl ++++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s ++// RUN: %clang_cc1 -O0 -cl-std=CL3.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s + // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s + + // CL12-LABEL: define void @func1(i32 addrspace(5)* %x) +diff --git a/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl b/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl +index a5d438933fa4..8073c7756eb6 100644 +--- a/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl ++++ b/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl +@@ -4,6 +4,17 @@ + // RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s + // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL1.2 %s -emit-llvm -o - | FileCheck %s + // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple r600 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple r600 -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s + + #ifdef __AMDGCN__ + #define PTSIZE 8 +@@ -11,7 +22,7 @@ + #define PTSIZE 4 + #endif + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #pragma OPENCL EXTENSION cl_khr_fp64 : enable + #endif + #ifdef cl_khr_fp16 +@@ -59,8 +70,12 @@ void test() { + check(__alignof__(double) == 8); + #endif + +- check(sizeof(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4)); +- check(__alignof__(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4)); ++ check(sizeof(private void*) == 4); ++ check(__alignof__(private void*) == 4); ++#ifdef __opencl_c_generic_address_space ++ check(sizeof(generic void*) == 8); ++ check(__alignof__(generic void*) == 8); ++#endif + check(sizeof(global_ptr_t) == PTSIZE); + check(__alignof__(global_ptr_t) == PTSIZE); + check(sizeof(constant_ptr_t) == PTSIZE); +diff --git a/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl +index d1ab6aceac5c..70c5bace023b 100644 +--- a/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl ++++ b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL1.2 -emit-llvm -o - -O0 | FileCheck %s ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s + + #pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable + void test_int8(uchar4 ua, uchar4 ub, char4 sa, char4 sb) { +diff --git a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl +index 76ace5dca21e..5dc43e222f75 100644 +--- a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl ++++ b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl +@@ -1,6 +1,8 @@ + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM ++// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM ++// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM + + kernel void ker() {}; + // CHECK: define{{.*}}@ker() #0 +diff --git a/clang/test/CodeGenOpenCL/fpmath.cl b/clang/test/CodeGenOpenCL/fpmath.cl +index 0108d909c94e..b28392739c71 100644 +--- a/clang/test/CodeGenOpenCL/fpmath.cl ++++ b/clang/test/CodeGenOpenCL/fpmath.cl +@@ -2,6 +2,8 @@ + // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s + // RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s + // RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL1.2 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL3.0 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s + + typedef __attribute__(( ext_vector_type(4) )) float float4; + +diff --git a/clang/test/CodeGenOpenCL/generic-address-space-feature.cl b/clang/test/CodeGenOpenCL/generic-address-space-feature.cl +new file mode 100644 +index 000000000000..890860abe4d9 +--- /dev/null ++++ b/clang/test/CodeGenOpenCL/generic-address-space-feature.cl +@@ -0,0 +1,28 @@ ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL12 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL12 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL20 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL20 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,__opencl_c_generic_address_space -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30-GENERIC ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL30 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,__opencl_c_generic_address_space -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL30-GENERIC ++ ++void test(global float* src1, local float *src2, private float *src3, float *src4, float tmp) { ++ // CL20: %{{.+}} = addrspacecast float addrspace(1)* %{{.+}} to float addrspace(4)* ++ // CL12-NOT: addrspacecast ++ // CL30-NOT: addrspacecast ++ // CL30-GENERIC-NOT: addrspacecast ++ tmp = sincos(tmp, src1); ++ // CL20: %{{.+}} = addrspacecast float addrspace(3)* %{{.+}} to float addrspace(4)* ++ // CL12-NOT: addrspacecast ++ // CL30-NOT: addrspacecast ++ // CL30-GENERIC-NOT: addrspacecast ++ tmp = sincos(tmp, src2); ++ ++ // CL12: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}}) ++ // CL20: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}}) ++ // CL30: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}}) ++ // CL30-GENERIC: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}}) ++ // CHECK: addrspacecast ++ tmp = sincos(tmp, src4); ++} +diff --git a/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl b/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl +index 515f13f6e768..5aa31ac6f345 100644 +--- a/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl ++++ b/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s + + // CHECK: %opencl.intel_sub_group_avc_mce_payload_t = type opaque + // CHECK: %opencl.intel_sub_group_avc_ime_payload_t = type opaque +diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl +index e89237623478..3d6708ac361f 100644 +--- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl ++++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl +@@ -1,5 +1,8 @@ + // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s + // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s ++ + // Test that the kernels always use the SPIR calling convention + // to have unambiguous mapping of arguments to feasibly implement + // clSetKernelArg(). +diff --git a/clang/test/CodeGenOpenCL/logical-ops.cl b/clang/test/CodeGenOpenCL/logical-ops.cl +index f083a8580ee7..499eab65039b 100644 +--- a/clang/test/CodeGenOpenCL/logical-ops.cl ++++ b/clang/test/CodeGenOpenCL/logical-ops.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s + // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=clc++ -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s + + #pragma OPENCL EXTENSION cl_khr_fp64 : enable +diff --git a/clang/test/CodeGenOpenCL/no-half.cl b/clang/test/CodeGenOpenCL/no-half.cl +index aee8f678f01a..46da7fa339e8 100644 +--- a/clang/test/CodeGenOpenCL/no-half.cl ++++ b/clang/test/CodeGenOpenCL/no-half.cl +@@ -1,6 +1,7 @@ + // RUN: %clang_cc1 %s -cl-std=cl2.0 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s + // RUN: %clang_cc1 %s -cl-std=cl1.2 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s + // RUN: %clang_cc1 %s -cl-std=cl1.1 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s ++// RUN: %clang_cc1 %s -cl-std=cl3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s + + #pragma OPENCL EXTENSION cl_khr_fp64:enable + +diff --git a/clang/test/CodeGenOpenCL/pipe_builtin.cl b/clang/test/CodeGenOpenCL/pipe_builtin.cl +index 02b9669b7ab1..0bf35c336199 100644 +--- a/clang/test/CodeGenOpenCL/pipe_builtin.cl ++++ b/clang/test/CodeGenOpenCL/pipe_builtin.cl +@@ -1,4 +1,7 @@ + // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=clc++ -o - %s | FileCheck %s ++// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl2.0 -o - %s | FileCheck %s ++// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl3.0 -cl-ext=__opencl_c_pipes,__opencl_c_subgroups -o - %s | FileCheck %s ++ + // FIXME: Add MS ABI manglings of OpenCL things and remove %itanium_abi_triple + // above to support OpenCL in the MS C++ ABI. + +diff --git a/clang/test/CodeGenOpenCL/pipe_types.cl b/clang/test/CodeGenOpenCL/pipe_types.cl +index ba064c6d7557..b7a523d4f084 100644 +--- a/clang/test/CodeGenOpenCL/pipe_types.cl ++++ b/clang/test/CodeGenOpenCL/pipe_types.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s ++// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -o - %s | FileCheck %s + + // CHECK: %opencl.pipe_ro_t = type opaque + // CHECK: %opencl.pipe_wo_t = type opaque +diff --git a/clang/test/CodeGenOpenCL/printf.cl b/clang/test/CodeGenOpenCL/printf.cl +index fc139d776db6..0133c5595d81 100644 +--- a/clang/test/CodeGenOpenCL/printf.cl ++++ b/clang/test/CodeGenOpenCL/printf.cl +@@ -1,5 +1,7 @@ + // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-+cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s + // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s ++// RUN: %clang_cc1 -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s ++// RUN: %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s + + typedef __attribute__((ext_vector_type(2))) float float2; + typedef __attribute__((ext_vector_type(2))) half half2; +diff --git a/clang/test/CodeGenOpenCL/unroll-hint.cl b/clang/test/CodeGenOpenCL/unroll-hint.cl +index 0f84450a1ae6..9347c935869b 100644 +--- a/clang/test/CodeGenOpenCL/unroll-hint.cl ++++ b/clang/test/CodeGenOpenCL/unroll-hint.cl +@@ -1,5 +1,6 @@ + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s | FileCheck %s ++// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s | FileCheck %s + + /*** for ***/ + void for_count() +diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c +index 5c0bfb69f9a3..eabdf0b5938d 100644 +--- a/clang/test/Driver/autocomplete.c ++++ b/clang/test/Driver/autocomplete.c +@@ -43,6 +43,8 @@ + // CLSTDALL-NEXT: CL1.2 + // CLSTDALL-NEXT: cl2.0 + // CLSTDALL-NEXT: CL2.0 ++// CLSTDALL-NEXT: cl3.0 ++// CLSTDALL-NEXT: CL3.0 + // CLSTDALL-NEXT: clc++ + // CLSTDALL-NEXT: CLC++ + // RUN: %clang --autocomplete=-fno-sanitize-coverage=,f | FileCheck %s -check-prefix=FNOSANICOVER +diff --git a/clang/test/Driver/opencl.cl b/clang/test/Driver/opencl.cl +index 63b04bc1af41..ffdd4f7d65bb 100644 +--- a/clang/test/Driver/opencl.cl ++++ b/clang/test/Driver/opencl.cl +@@ -2,6 +2,7 @@ + // RUN: %clang -S -### -cl-std=CL1.1 %s 2>&1 | FileCheck --check-prefix=CHECK-CL11 %s + // RUN: %clang -S -### -cl-std=CL1.2 %s 2>&1 | FileCheck --check-prefix=CHECK-CL12 %s + // RUN: %clang -S -### -cl-std=CL2.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL20 %s ++// RUN: %clang -S -### -cl-std=CL3.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL30 %s + // RUN: %clang -S -### -cl-std=clc++ %s 2>&1 | FileCheck --check-prefix=CHECK-CLCPP %s + // RUN: %clang -S -### -cl-opt-disable %s 2>&1 | FileCheck --check-prefix=CHECK-OPT-DISABLE %s + // RUN: %clang -S -### -cl-strict-aliasing %s 2>&1 | FileCheck --check-prefix=CHECK-STRICT-ALIASING %s +@@ -22,6 +23,7 @@ + // CHECK-CL11: "-cc1" {{.*}} "-cl-std=CL1.1" + // CHECK-CL12: "-cc1" {{.*}} "-cl-std=CL1.2" + // CHECK-CL20: "-cc1" {{.*}} "-cl-std=CL2.0" ++// CHECK-CL30: "-cc1" {{.*}} "-cl-std=CL3.0" + // CHECK-CLCPP: "-cc1" {{.*}} "-cl-std=clc++" + // CHECK-OPT-DISABLE: "-cc1" {{.*}} "-cl-opt-disable" + // CHECK-STRICT-ALIASING: "-cc1" {{.*}} "-cl-strict-aliasing" +diff --git a/clang/test/Driver/unknown-std.cl b/clang/test/Driver/unknown-std.cl +index 6f371bac13ac..00209fb62556 100644 +--- a/clang/test/Driver/unknown-std.cl ++++ b/clang/test/Driver/unknown-std.cl +@@ -10,6 +10,7 @@ + // CHECK-NEXT: note: use 'cl1.1' for 'OpenCL 1.1' standard + // CHECK-NEXT: note: use 'cl1.2' for 'OpenCL 1.2' standard + // CHECK-NEXT: note: use 'cl2.0' for 'OpenCL 2.0' standard ++// CHECK-NEXT: note: use 'cl3.0' for 'OpenCL 3.0' standard + // CHECK-NEXT: note: use 'clc++' for 'C++ for OpenCL' standard + + // Make sure that no other output is present. +diff --git a/clang/test/Frontend/stdlang.c b/clang/test/Frontend/stdlang.c +index 51484999e37a..eac4632fbdd6 100644 +--- a/clang/test/Frontend/stdlang.c ++++ b/clang/test/Frontend/stdlang.c +@@ -9,6 +9,7 @@ + // RUN: %clang_cc1 -x cl -cl-std=CL1.1 -DOPENCL %s + // RUN: %clang_cc1 -x cl -cl-std=CL1.2 -DOPENCL %s + // RUN: %clang_cc1 -x cl -cl-std=CL2.0 -DOPENCL %s ++// RUN: %clang_cc1 -x cl -cl-std=CL3.0 -DOPENCL %s + // RUN: %clang_cc1 -x cl -cl-std=CLC++ -DOPENCL %s + // RUN: not %clang_cc1 -x cl -std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s + // RUN: not %clang_cc1 -x cl -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s +diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl +index 1b151ffdd16a..2716076acdcf 100644 +--- a/clang/test/Headers/opencl-c-header.cl ++++ b/clang/test/Headers/opencl-c-header.cl +@@ -1,6 +1,7 @@ + // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify | FileCheck %s + // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.1 | FileCheck %s + // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.2 | FileCheck %s ++// RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL3.0 | FileCheck %s + // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=clc++ | FileCheck %s --check-prefix=CHECK20 + + // Test including the default header as a module. +@@ -39,9 +40,11 @@ + // RUN: rm -rf %t + // RUN: mkdir -p %t + // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s ++// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s + // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s + // RUN: chmod u-w %t + // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s ++// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s + // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s + // RUN: chmod u+w %t + +@@ -67,7 +70,7 @@ char f(char x) { + // from OpenCL 2.0 onwards. + + // CHECK20: _Z12write_imagef14ocl_image3d_wo +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) + void test_image3dwo(write_only image3d_t img) { + write_imagef(img, (0), (0.0f)); + } +@@ -81,7 +84,7 @@ void test_atomics(__generic volatile unsigned int* a) { + #endif + + // Verify that ATOMIC_VAR_INIT is defined. +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) + global atomic_int z = ATOMIC_VAR_INIT(99); + #endif //__OPENCL_C_VERSION__ + +diff --git a/clang/test/Index/pipe-size.cl b/clang/test/Index/pipe-size.cl +index 94a1255f0a48..59b76051eda1 100644 +--- a/clang/test/Index/pipe-size.cl ++++ b/clang/test/Index/pipe-size.cl +@@ -2,6 +2,13 @@ + // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR + // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64 + // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=X86 ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64 ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN ++ ++ ++ + __kernel void testPipe( pipe int test ) + { + int s = sizeof(test); +diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c +index def105f4c52e..b088a37ba665 100644 +--- a/clang/test/Preprocessor/predefined-macros.c ++++ b/clang/test/Preprocessor/predefined-macros.c +@@ -129,6 +129,8 @@ + // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL12 + // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL2.0 \ + // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL20 ++// RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL3.0 \ ++// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL30 + // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-fast-relaxed-math \ + // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FRM + // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=clc++ \ +@@ -137,26 +139,37 @@ + // CHECK-CL10: #define CL_VERSION_1_1 110 + // CHECK-CL10: #define CL_VERSION_1_2 120 + // CHECK-CL10: #define CL_VERSION_2_0 200 ++// CHECK-CL10: #define CL_VERSION_3_0 300 + // CHECK-CL10: #define __OPENCL_C_VERSION__ 100 + // CHECK-CL10-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CL11: #define CL_VERSION_1_0 100 + // CHECK-CL11: #define CL_VERSION_1_1 110 + // CHECK-CL11: #define CL_VERSION_1_2 120 + // CHECK-CL11: #define CL_VERSION_2_0 200 ++// CHECK-CL11: #define CL_VERSION_3_0 300 + // CHECK-CL11: #define __OPENCL_C_VERSION__ 110 + // CHECK-CL11-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CL12: #define CL_VERSION_1_0 100 + // CHECK-CL12: #define CL_VERSION_1_1 110 + // CHECK-CL12: #define CL_VERSION_1_2 120 + // CHECK-CL12: #define CL_VERSION_2_0 200 ++// CHECK-CL12: #define CL_VERSION_3_0 300 + // CHECK-CL12: #define __OPENCL_C_VERSION__ 120 + // CHECK-CL12-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CL20: #define CL_VERSION_1_0 100 + // CHECK-CL20: #define CL_VERSION_1_1 110 + // CHECK-CL20: #define CL_VERSION_1_2 120 + // CHECK-CL20: #define CL_VERSION_2_0 200 ++// CHECK-CL20: #define CL_VERSION_3_0 300 + // CHECK-CL20: #define __OPENCL_C_VERSION__ 200 + // CHECK-CL20-NOT: #define __FAST_RELAXED_MATH__ 1 ++// CHECK-CL30: #define CL_VERSION_1_0 100 ++// CHECK-CL30: #define CL_VERSION_1_1 110 ++// CHECK-CL30: #define CL_VERSION_1_2 120 ++// CHECK-CL30: #define CL_VERSION_2_0 200 ++// CHECK-CL30: #define CL_VERSION_3_0 300 ++// CHECK-CL30: #define __OPENCL_C_VERSION__ 300 ++// CHECK-CL30-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-FRM: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CLCPP10: #define __CL_CPP_VERSION_1_0__ 100 + // CHECK-CLCPP10: #define __OPENCL_CPP_VERSION__ 100 +diff --git a/clang/test/Sema/feature-extensions-simult-support.cl b/clang/test/Sema/feature-extensions-simult-support.cl +new file mode 100644 +index 000000000000..0789105002b2 +--- /dev/null ++++ b/clang/test/Sema/feature-extensions-simult-support.cl +@@ -0,0 +1,75 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_subgroups ++ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups ++ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_device_enqueue ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_read_write_images ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64,-cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes,-cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups,-cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_subgroups ++ ++// expected-no-diagnostics ++ ++#ifdef cl_khr_fp64 ++ #ifndef __opencl_c_fp64 ++ #error macros were not properly set up ++ #endif ++#endif ++#ifdef __opencl_c_fp64 ++ #ifndef cl_khr_fp64 ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#ifdef cl_khr_3d_image_writes ++ #ifndef __opencl_c_3d_image_writes ++ #error macros were not properly set up ++ #endif ++#endif ++#ifdef __opencl_c_3d_image_writes ++ #ifndef cl_khr_3d_image_writes ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#ifdef cl_khr_subgroups ++ #ifndef __opencl_c_subgroups ++ #error macros were not properly set up ++ #endif ++#endif ++#ifdef __opencl_c_subgroups ++ #ifndef cl_khr_subgroups ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#if defined(__opencl_c_pipes) || defined(__opencl_c_device_enqueue) ++ #ifndef __opencl_c_generic_address_space ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#if defined(__opencl_c_3d_image_writes) || defined(__opencl_c_read_write_images) ++ #ifndef __opencl_c_images ++ #error macros were not properly set up ++ #endif ++#endif ++ ++kernel void test(){} +diff --git a/clang/test/Sema/features-ignore-pragma.cl b/clang/test/Sema/features-ignore-pragma.cl +new file mode 100644 +index 000000000000..046ce5390754 +--- /dev/null ++++ b/clang/test/Sema/features-ignore-pragma.cl +@@ -0,0 +1,24 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_3d_image_writes ++ ++#pragma OPENCL EXTENSION __opencl_c_fp64 : enable ++// expected-warning@-1 {{OpenCL feature support can't be controlled via pragma, ignoring}} ++ ++#pragma OPENCL EXTENSION cl_khr_fp64 : enable ++#ifndef __opencl_c_fp64 ++// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_fp64' - ignoring}} ++#endif ++ ++#pragma OPENCL EXTENSION cl_khr_subgroups : enable ++#ifndef __opencl_c_subgroups ++// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_subgroups' - ignoring}} ++#endif ++ ++#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable ++#ifndef __opencl_c_3d_image_writes ++// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_3d_image_writes' - ignoring}} ++#endif ++ ++kernel void foo() {} +diff --git a/clang/test/Sema/opencl-features-pipes.cl b/clang/test/Sema/opencl-features-pipes.cl +new file mode 100644 +index 000000000000..c0ac778f24a6 +--- /dev/null ++++ b/clang/test/Sema/opencl-features-pipes.cl +@@ -0,0 +1,18 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.1 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -DHAS ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -DHAS ++// expected-no-diagnostics ++ ++#ifdef HAS ++ #ifndef __opencl_c_pipes ++ #error Feature should be defined ++ #endif ++#else ++ #ifdef __opencl_c_pipes ++ #error Feature should not be defined ++ #endif ++#endif ++ ++kernel void foo() {} +diff --git a/clang/test/Sema/opencl-features.cl b/clang/test/Sema/opencl-features.cl +new file mode 100644 +index 000000000000..aa432f6b60bf +--- /dev/null ++++ b/clang/test/Sema/opencl-features.cl +@@ -0,0 +1,128 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-__opencl_c_device_enqueue,-__opencl_c_pipes,-__opencl_c_read_write_images ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CLC++ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 ++// expected-no-diagnostics ++ ++#ifndef __opencl_c_int64 ++ #error Feature __opencl_c_int64 shouldn't be defined ++#endif ++ ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) ++ #ifndef __opencl_c_3d_image_writes ++ #error Feature __opencl_c_3d_image_writes should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_order_acq_rel ++ #error Feature __opencl_c_atomic_order_acq_rel should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_order_seq_cst ++ #error Feature __opencl_c_atomic_order_seq_cst should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_scope_device ++ #error Feature __opencl_c_atomic_scope_device should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_scope_all_devices ++ #error Feature __opencl_c_atomic_scope_all_devices should be defined ++ #endif ++ ++ #ifndef __opencl_c_device_enqueue ++ #error Feature __opencl_c_device_enqueue should be defined ++ #endif ++ ++ #ifndef __opencl_c_generic_address_space ++ #error Feature __opencl_c_generic_address_space should be defined ++ #endif ++ ++ #ifndef __opencl_c_pipes ++ #error Feature __opencl_c_pipes should be defined ++ #endif ++ ++ #ifndef __opencl_c_program_scope_global_variables ++ #error Feature __opencl_c_program_scope_global_variables should be defined ++ #endif ++ ++ #ifndef __opencl_c_read_write_images ++ #error Feature __opencl_c_read_write_images should be defined ++ #endif ++ ++ #ifndef __opencl_c_subgroups ++ #error Feature __opencl_c_subgroups should be defined ++ #endif ++ ++ #ifndef __opencl_c_work_group_collective_functions ++ #error Feature __opencl_c_work_group_collective_functions should be defined ++ #endif ++ ++ #ifndef __opencl_c_fp64 ++ #error Feature __opencl_c_fp64 should be defined ++ #endif ++ ++ #ifndef __opencl_c_images ++ #error Feature __opencl_c_images should be defined ++ #endif ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++ ++ ++#if __OPENCL_C_VERSION__ == CL_VERSION_3_0 ++ #ifdef __opencl_c_3d_image_writes ++ #error Feature __opencl_c_3d_image_writes shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_order_acq_rel ++ #error Feature __opencl_c_atomic_order_acq_rel shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_order_seq_cst ++ #error Feature __opencl_c_atomic_order_seq_cst shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_scope_device ++ #error Feature __opencl_c_atomic_scope_device shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_scope_all_devices ++ #error Feature __opencl_c_atomic_scope_all_devices shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_device_enqueue ++ #error Feature __opencl_c_device_enqueue shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_generic_address_space ++ #error Feature __opencl_c_generic_address_space shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_pipes ++ #error Feature __opencl_c_pipes shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_program_scope_global_variables ++ #error Feature __opencl_c_program_scope_global_variables shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_read_write_images ++ #error Feature __opencl_c_read_write_images shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_subgroups ++ #error Feature __opencl_c_subgroups shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_work_group_collective_functions ++ #error Feature __opencl_c_work_group_collective_functions shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_fp64 ++ #error Feature __opencl_c_fp64 shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_images ++ #error Feature __opencl_c_images shouldn't be defined ++ #endif ++#endif // __OPENCL_C_VERSION__ == CL_VERSION_3_0 ++ ++kernel void foo() {} +diff --git a/clang/test/Sema/pipe_builtins_feature.cl b/clang/test/Sema/pipe_builtins_feature.cl +new file mode 100644 +index 000000000000..56fa94fc7705 +--- /dev/null ++++ b/clang/test/Sema/pipe_builtins_feature.cl +@@ -0,0 +1,21 @@ ++// RUN: %clang_cc1 -cl-std=CL2.0 -fsyntax-only -verify %s ++// RUN: %clang_cc1 -cl-std=CL3.0 -fsyntax-only -verify %s ++// RUN: %clang_cc1 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -fsyntax-only -verify %s ++ ++#ifdef __opencl_c_pipes ++ #ifndef __opencl_c_generic_address_space ++ #error Generic address space feature must also be defined ++ #endif ++// CHECK: expected-no-diagnostics ++// check that pragma disable all doesn't touch feature support ++ #pragma OPENCL EXTENSION all : disable ++#endif ++ ++void test(read_only pipe int p, global int *ptr) { ++ reserve_id_t rid; ++} ++ ++#ifndef __opencl_c_pipes ++// expected-error@-5 {{expected parameter declarator}} expected-error@-5 {{expected ')'}} expected-note@-5 {{to match this '('}} ++// expected-error@-5 {{use of type 'reserve_id_t' requires __opencl_c_pipes extension to be enabled}} ++#endif +diff --git a/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl b/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl +index 5efea216346a..09aea27ae6de 100644 +--- a/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl ++++ b/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl +@@ -1,6 +1,9 @@ + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL2.0 + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL2.0 + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL2.0 ++// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space ++// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space ++// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=clc++ + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=clc++ + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=clc++ +diff --git a/clang/test/SemaOpenCL/address-spaces.cl b/clang/test/SemaOpenCL/address-spaces.cl +index 07547ea19680..e367a7c57292 100644 +--- a/clang/test/SemaOpenCL/address-spaces.cl ++++ b/clang/test/SemaOpenCL/address-spaces.cl +@@ -1,5 +1,6 @@ + // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only + // RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -verify -pedantic -fsyntax-only + // RUN: %clang_cc1 %s -cl-std=clc++ -verify -pedantic -fsyntax-only + + __constant int ci = 1; +diff --git a/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl b/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl +index f63e2913c749..727141190a0b 100644 +--- a/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl ++++ b/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl +@@ -2,6 +2,12 @@ + // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir-unknown-unknown" -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile" + // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS= + // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile" ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS= ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile" ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS= ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile" ++ ++ + + typedef struct {int a;} ndrange_t; + // Diagnostic tests for different overloads of enqueue_kernel from Table 6.13.17.1 of OpenCL 2.0 Spec. +@@ -235,11 +241,17 @@ kernel void bar(global unsigned int *buf) + kernel void foo1(global unsigned int *buf) + { + ndrange_t n; +- buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++ buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); ++#if __OPENCL_C_VERSION__ < 300 ++// expected-error@-2 {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++#endif + } + + kernel void bar1(global unsigned int *buf) + { + ndrange_t n; +- buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++ buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); ++#if __OPENCL_C_VERSION__ < 300 ++// expected-error@-2 {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++#endif + } +diff --git a/clang/test/SemaOpenCL/forget-unsupported-builtins.cl b/clang/test/SemaOpenCL/forget-unsupported-builtins.cl +new file mode 100644 +index 000000000000..14dd03e2c7db +--- /dev/null ++++ b/clang/test/SemaOpenCL/forget-unsupported-builtins.cl +@@ -0,0 +1,22 @@ ++// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -cl-ext=__opencl_c_pipes,__opencl_c_generic_address_space,__opencl_c_device_enqueue -verify %s -triple spir-unknown-unknown -DFEATURES ++ ++#ifndef FEATURES ++ // expected-no-diagnostics ++#else ++ // expected-error@+10 {{cannot redeclare builtin function 'get_pipe_max_packets'}} ++ // expected-note@+9 {{'get_pipe_max_packets' is a builtin with type 'unsigned int ()'}} ++ // expected-error@+9 {{cannot redeclare builtin function 'to_local'}} ++ // expected-note@+8 {{'to_local' is a builtin with type 'void *(void *)'}} ++ // expected-error@+8 {{cannot redeclare builtin function 'to_global'}} ++ // expected-note@+7 {{'to_global' is a builtin with type 'void *(void *)'}} ++ // expected-error@+7 {{cannot redeclare builtin function 'get_kernel_work_group_size'}} ++ // expected-note@+6 {{'get_kernel_work_group_size' is a builtin with type 'unsigned int ()'}} ++#endif ++ ++int get_pipe_max_packets(int); ++int to_local(int); ++int to_global(int); ++int get_kernel_work_group_size(int); ++ ++kernel void test(global int *dst) {} +diff --git a/clang/test/SemaOpenCL/image-features.cl b/clang/test/SemaOpenCL/image-features.cl +new file mode 100644 +index 000000000000..ace6913bb31e +--- /dev/null ++++ b/clang/test/SemaOpenCL/image-features.cl +@@ -0,0 +1,20 @@ ++// RUN: %clang_cc1 -cl-std=cl2.0 -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_images -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_3d_image_writes -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_read_write_images -fsyntax-only -verify %s -triple spir-unknown-unknown ++ ++#if defined(__opencl_c_read_write_images) && defined(__opencl_c_3d_image_writes) ++ // expected-no-diagnostics ++#endif ++ ++__kernel void write_3d_image(__write_only image3d_t i) {} ++ ++#ifndef __opencl_c_3d_image_writes ++ // expected-error@-3 {{use of type '__write_only image3d_t' requires __opencl_c_3d_image_writes extension to be enabled}} ++#endif ++ ++__kernel void read_write_3d_image(__read_write image3d_t i) { } ++ ++#ifndef __opencl_c_read_write_images ++ // expected-error@-3 {{use of type '__read_write image3d_t' requires __opencl_c_read_write_images extension to be enabled}} ++#endif +diff --git a/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl b/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl +index 36e76621d24a..38b0a04726e3 100644 +--- a/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl ++++ b/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups,__opencl_c_pipes + + #pragma OPENCL EXTENSION cl_khr_subgroups : enable + +diff --git a/clang/test/SemaOpenCL/storageclass-cl20.cl b/clang/test/SemaOpenCL/storageclass-cl20.cl +index 581701d2a6a5..469c526ebc30 100644 +--- a/clang/test/SemaOpenCL/storageclass-cl20.cl ++++ b/clang/test/SemaOpenCL/storageclass-cl20.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables,__opencl_c_generic_address_space + + int G2 = 0; + global int G3 = 0; +diff --git a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp +index 41d33b550680..f50652493e5e 100644 +--- a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp ++++ b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp +@@ -56,6 +56,7 @@ + //===----------------------------------------------------------------------===// + + #include "TableGenBackends.h" ++#include "clang/Basic/OpenCLOptions.h" + #include "llvm/ADT/MapVector.h" + #include "llvm/ADT/STLExtras.h" + #include "llvm/ADT/SmallString.h" +@@ -69,6 +70,7 @@ + #include "llvm/TableGen/Record.h" + #include "llvm/TableGen/StringMatcher.h" + #include "llvm/TableGen/TableGenBackend.h" ++#include + #include + + using namespace llvm; +@@ -228,6 +230,10 @@ private: + // The function "tan", having the same signatures, would be mapped to the + // same entry (). + MapVector SignatureListMap; ++ ++ // Encode all versions ++ unsigned short ++ EncodeBuiltinVersions(std::vector BuiltinVersionsRecords) const; + }; + } // namespace + +@@ -338,12 +344,10 @@ struct OpenCLBuiltinStruct { + const bool IsConst; + // Function attribute __attribute__((convergent)) + const bool IsConv; ++ // All opencl versions encoded ++ const unsigned char AllVersions : 5; + // OpenCL extension(s) required for this overload. + const unsigned short Extension; +- // First OpenCL version in which this overload was introduced (e.g. CL20). +- const unsigned short MinVersion; +- // First OpenCL version in which this overload was removed (e.g. CL20). +- const unsigned short MaxVersion; + }; + + )"; +@@ -503,11 +507,9 @@ void BuiltinNameEmitter::EmitBuiltinTable() { + << (Overload.first->getValueAsBit("IsPure")) << ", " + << (Overload.first->getValueAsBit("IsConst")) << ", " + << (Overload.first->getValueAsBit("IsConv")) << ", " +- << FunctionExtensionIndex[ExtName] << ", " +- << Overload.first->getValueAsDef("MinVersion")->getValueAsInt("ID") +- << ", " +- << Overload.first->getValueAsDef("MaxVersion")->getValueAsInt("ID") +- << " },\n"; ++ << EncodeBuiltinVersions( ++ Overload.first->getValueAsListOfDefs("Versions")) ++ << ", " << FunctionExtensionIndex[ExtName] << " },\n"; + Index++; + } + } +@@ -528,10 +530,8 @@ bool BuiltinNameEmitter::CanReuseSignature( + if (Rec->getValueAsBit("IsPure") == Rec2->getValueAsBit("IsPure") && + Rec->getValueAsBit("IsConst") == Rec2->getValueAsBit("IsConst") && + Rec->getValueAsBit("IsConv") == Rec2->getValueAsBit("IsConv") && +- Rec->getValueAsDef("MinVersion")->getValueAsInt("ID") == +- Rec2->getValueAsDef("MinVersion")->getValueAsInt("ID") && +- Rec->getValueAsDef("MaxVersion")->getValueAsInt("ID") == +- Rec2->getValueAsDef("MaxVersion")->getValueAsInt("ID") && ++ EncodeBuiltinVersions(Rec->getValueAsListOfDefs("Versions")) == ++ EncodeBuiltinVersions(Rec2->getValueAsListOfDefs("Versions")) && + Rec->getValueAsDef("Extension")->getName() == + Rec2->getValueAsDef("Extension")->getName()) { + return true; +@@ -806,6 +806,15 @@ static void OCL2Qual(ASTContext &Context, const OpenCLTypeStruct &Ty, + OS << "\n} // OCL2Qual\n"; + } + ++unsigned short BuiltinNameEmitter::EncodeBuiltinVersions( ++ std::vector BuiltinVersionsRecords) const { ++ return std::accumulate( ++ BuiltinVersionsRecords.begin(), BuiltinVersionsRecords.end(), ++ (unsigned short)0, [](unsigned short C, Record *R) { ++ return C |= clang::OpenCLOptions::EncodeVersion(R->getValueAsInt("ID")); ++ }); ++} ++ + void clang::EmitClangOpenCLBuiltins(RecordKeeper &Records, raw_ostream &OS) { + BuiltinNameEmitter NameChecker(Records, OS); + NameChecker.Emit(); +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Remove-repo-name-in-LLVM-IR.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Remove-repo-name-in-LLVM-IR.patch new file mode 100644 index 00000000..232ae063 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Remove-repo-name-in-LLVM-IR.patch @@ -0,0 +1,50 @@ +From b53fd86ffdeacb9b13624bdb110fd25e8c35cb92 Mon Sep 17 00:00:00 2001 +From: Feng Zou +Date: Mon, 19 Oct 2020 14:43:38 +0800 +Subject: [PATCH] Remove repo name in LLVM IR + +Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0003-Remove-repo-name-in-LLVM-IR.patch] +Signed-off-by: Feng Zou +Signed-off-by: Naveen Saini + +--- + llvm/cmake/modules/VersionFromVCS.cmake | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake +index 1b6519b4b7c..8fd6b23bb34 100644 +--- a/llvm/cmake/modules/VersionFromVCS.cmake ++++ b/llvm/cmake/modules/VersionFromVCS.cmake +@@ -33,17 +33,18 @@ function(get_source_info path revision repository) + else() + set(remote "origin") + endif() +- execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote} +- WORKING_DIRECTORY ${path} +- RESULT_VARIABLE git_result +- OUTPUT_VARIABLE git_output +- ERROR_QUIET) +- if(git_result EQUAL 0) +- string(STRIP "${git_output}" git_output) +- set(${repository} ${git_output} PARENT_SCOPE) +- else() +- set(${repository} ${path} PARENT_SCOPE) +- endif() ++ # Do not show repo name in IR ++ # execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote} ++ # WORKING_DIRECTORY ${path} ++ # RESULT_VARIABLE git_result ++ # OUTPUT_VARIABLE git_output ++ # ERROR_QUIET) ++ # if(git_result EQUAL 0) ++ # string(STRIP "${git_output}" git_output) ++ # set(${repository} ${git_output} PARENT_SCOPE) ++ # else() ++ # set(${repository} ${path} PARENT_SCOPE) ++ # endif() + endif() + endif() + endfunction() +-- +2.18.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-OpenCL-3.0-support.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-OpenCL-3.0-support.patch new file mode 100644 index 00000000..98545db0 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-OpenCL-3.0-support.patch @@ -0,0 +1,8453 @@ +From d767afa79d1c8153081eac1ef33e348cadbea5bb Mon Sep 17 00:00:00 2001 +From: Anton Zabaznov +Date: Tue, 22 Sep 2020 19:03:50 +0300 +Subject: [PATCH] OpenCL 3.0 support + +Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-110/patches/clang/0001-OpenCL-3.0-support.patch] +Signed-off-by: Anton Zabaznov +Signed-off-by: Naveen Saini + + +--- + clang/include/clang/Basic/Builtins.def | 65 +- + clang/include/clang/Basic/Builtins.h | 13 +- + .../clang/Basic/DiagnosticParseKinds.td | 2 + + .../clang/Basic/DiagnosticSemaKinds.td | 7 + + clang/include/clang/Basic/LangOptions.def | 2 + + clang/include/clang/Basic/LangStandards.def | 4 + + .../include/clang/Basic/OpenCLExtensions.def | 15 + + clang/include/clang/Basic/OpenCLOptions.h | 174 +- + clang/include/clang/Driver/Options.td | 2 +- + clang/include/clang/Sema/Sema.h | 9 + + clang/lib/AST/ASTContext.cpp | 3 +- + clang/lib/Basic/Builtins.cpp | 28 +- + clang/lib/Basic/TargetInfo.cpp | 11 + + clang/lib/Basic/Targets.cpp | 1 - + clang/lib/CodeGen/CodeGenFunction.cpp | 6 +- + clang/lib/Frontend/CompilerInvocation.cpp | 22 +- + clang/lib/Frontend/InitPreprocessor.cpp | 6 +- + clang/lib/Headers/opencl-c-base.h | 75 +- + clang/lib/Headers/opencl-c.h | 3216 ++++++++++++++--- + clang/lib/Parse/ParseDecl.cpp | 12 +- + clang/lib/Parse/ParsePragma.cpp | 10 +- + clang/lib/Sema/OpenCLBuiltins.td | 153 +- + clang/lib/Sema/Sema.cpp | 47 +- + clang/lib/Sema/SemaChecking.cpp | 38 +- + clang/lib/Sema/SemaDecl.cpp | 18 +- + clang/lib/Sema/SemaDeclAttr.cpp | 9 +- + clang/lib/Sema/SemaDeclCXX.cpp | 12 +- + clang/lib/Sema/SemaLookup.cpp | 19 +- + clang/lib/Sema/SemaType.cpp | 14 +- + .../CodeGenOpenCL/addr-space-struct-arg.cl | 7 +- + .../address-spaces-conversions.cl | 2 + + .../CodeGenOpenCL/address-spaces-mangling.cl | 3 +- + clang/test/CodeGenOpenCL/address-spaces.cl | 4 + + .../amdgcn-automatic-variable.cl | 1 + + .../CodeGenOpenCL/amdgpu-sizeof-alignof.cl | 21 +- + .../CodeGenOpenCL/arm-integer-dot-product.cl | 1 + + .../test/CodeGenOpenCL/cl-uniform-wg-size.cl | 2 + + clang/test/CodeGenOpenCL/fpmath.cl | 2 + + .../generic-address-space-feature.cl | 28 + + .../intel-subgroups-avc-ext-types.cl | 1 + + .../kernels-have-spir-cc-by-default.cl | 3 + + clang/test/CodeGenOpenCL/logical-ops.cl | 1 + + clang/test/CodeGenOpenCL/no-half.cl | 1 + + clang/test/CodeGenOpenCL/pipe_builtin.cl | 3 + + clang/test/CodeGenOpenCL/pipe_types.cl | 1 + + clang/test/CodeGenOpenCL/printf.cl | 2 + + clang/test/CodeGenOpenCL/unroll-hint.cl | 1 + + clang/test/Driver/autocomplete.c | 2 + + clang/test/Driver/opencl.cl | 2 + + clang/test/Driver/unknown-std.cl | 1 + + clang/test/Frontend/stdlang.c | 1 + + clang/test/Headers/opencl-c-header.cl | 7 +- + clang/test/Index/pipe-size.cl | 7 + + clang/test/Preprocessor/init-aarch64.c | 12 +- + clang/test/Preprocessor/predefined-macros.c | 13 + + .../Sema/feature-extensions-simult-support.cl | 75 + + clang/test/Sema/features-ignore-pragma.cl | 24 + + clang/test/Sema/opencl-features-pipes.cl | 18 + + clang/test/Sema/opencl-features.cl | 128 + + clang/test/Sema/pipe_builtins_feature.cl | 21 + + .../address-spaces-conversions-cl2.0.cl | 3 + + clang/test/SemaOpenCL/address-spaces.cl | 1 + + .../SemaOpenCL/cl20-device-side-enqueue.cl | 16 +- + .../SemaOpenCL/forget-unsupported-builtins.cl | 22 + + clang/test/SemaOpenCL/image-features.cl | 20 + + .../SemaOpenCL/invalid-pipe-builtin-cl2.0.cl | 1 + + clang/test/SemaOpenCL/storageclass-cl20.cl | 1 + + .../TableGen/ClangOpenCLBuiltinEmitter.cpp | 35 +- + 68 files changed, 3749 insertions(+), 738 deletions(-) + create mode 100644 clang/test/CodeGenOpenCL/generic-address-space-feature.cl + create mode 100644 clang/test/Sema/feature-extensions-simult-support.cl + create mode 100644 clang/test/Sema/features-ignore-pragma.cl + create mode 100644 clang/test/Sema/opencl-features-pipes.cl + create mode 100644 clang/test/Sema/opencl-features.cl + create mode 100644 clang/test/Sema/pipe_builtins_feature.cl + create mode 100644 clang/test/SemaOpenCL/forget-unsupported-builtins.cl + create mode 100644 clang/test/SemaOpenCL/image-features.cl + +diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def +index 5463b7dfc18c..e094beab9a6c 100644 +--- a/clang/include/clang/Basic/Builtins.def ++++ b/clang/include/clang/Basic/Builtins.def +@@ -111,6 +111,10 @@ + # define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) + #endif + ++#if defined(BUILTIN) && !defined(OPENCLBUILTIN) ++# define OPENCLBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS) ++#endif ++ + // Standard libc/libm functions: + BUILTIN(__builtin_atan2 , "ddd" , "Fne") + BUILTIN(__builtin_atan2f, "fff" , "Fne") +@@ -1529,50 +1533,54 @@ BUILTIN(__builtin_coro_param, "bv*v*", "n") + + // OpenCL v2.0 s6.13.16, s9.17.3.5 - Pipe functions. + // We need the generic prototype, since the packet type could be anything. +-LANGBUILTIN(read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(reserve_read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(reserve_write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(commit_write_pipe, "v.", "tn", OCLC20_LANG) +-LANGBUILTIN(commit_read_pipe, "v.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) +-LANGBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) +-LANGBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + +-LANGBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes") ++OPENCLBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes") + + // OpenCL v2.0 s6.13.17 - Enqueue kernel functions. + // Custom builtin check allows to perform special check of passed block arguments. +-LANGBUILTIN(enqueue_kernel, "i.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", OCLC20_LANG) +-LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC20_LANG) ++OPENCLBUILTIN(enqueue_kernel, "i.", "tn", OCLC2P_LANG, ++ "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC2P_LANG, ++ "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", ++ OCLC2P_LANG, "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", ++ OCLC2P_LANG, "__opencl_c_device_enqueue") ++OPENCLBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_device_enqueue") + + // OpenCL v2.0 s6.13.9 - Address space qualifier functions. + // FIXME: Pointer parameters of OpenCL builtins should have their address space + // requirement defined. +-LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG) +-LANGBUILTIN(to_local, "v*v*", "tn", OCLC20_LANG) +-LANGBUILTIN(to_private, "v*v*", "tn", OCLC20_LANG) ++OPENCLBUILTIN(to_global, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") ++OPENCLBUILTIN(to_local, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") ++OPENCLBUILTIN(to_private, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") + + // OpenCL half load/store builtin +-LANGBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES) +-LANGBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES) +-LANGBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES) +-LANGBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES) ++OPENCLBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES, "") ++OPENCLBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES, "") ++OPENCLBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES, "") ++OPENCLBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES, "") + + // Builtins for os_log/os_trace + BUILTIN(__builtin_os_log_format_buffer_size, "zcC*.", "p:0:nut") +@@ -1593,3 +1601,4 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") + #undef BUILTIN + #undef LIBBUILTIN + #undef LANGBUILTIN ++#undef OPENCLBUILTIN +diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h +index 15bfcf797917..ba3e7853444c 100644 +--- a/clang/include/clang/Basic/Builtins.h ++++ b/clang/include/clang/Basic/Builtins.h +@@ -33,13 +33,13 @@ enum LanguageID { + CXX_LANG = 0x4, // builtin for cplusplus only. + OBJC_LANG = 0x8, // builtin for objective-c and objective-c++ + MS_LANG = 0x10, // builtin requires MS mode. +- OCLC20_LANG = 0x20, // builtin for OpenCL C 2.0 only. ++ OCLC2P_LANG = 0x20, // builtin for OpenCL C 2.0+ versions. + OCLC1X_LANG = 0x40, // builtin for OpenCL C 1.x only. + OMP_LANG = 0x80, // builtin requires OpenMP. + ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages. + ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG, // builtin requires GNU mode. + ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG, // builtin requires MS mode. +- ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC20_LANG // builtin for OCLC languages. ++ ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC2P_LANG // builtin for OCLC languages. + }; + + namespace Builtin { +@@ -235,6 +235,10 @@ public: + /// for non-builtins. + bool canBeRedeclared(unsigned ID) const; + ++ bool requiresFeatures(unsigned ID) const { ++ return requiresFeatures(getRecord(ID)); ++ } ++ + private: + const Info &getRecord(unsigned ID) const; + +@@ -242,6 +246,11 @@ private: + bool builtinIsSupported(const Builtin::Info &BuiltinInfo, + const LangOptions &LangOpts); + ++ bool OclBuiltinIsSupported(const Builtin::Info &BuiltinInfo, ++ const LangOptions &LangOpts) const; ++ ++ bool requiresFeatures(const Builtin::Info &BuiltinInfo) const; ++ + /// Helper function for isPrintfLike and isScanfLike. + bool isLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg, + const char *Fmt) const; +diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td +index 1038a4119d4c..ba1b1b67a131 100644 +--- a/clang/include/clang/Basic/DiagnosticParseKinds.td ++++ b/clang/include/clang/Basic/DiagnosticParseKinds.td +@@ -1206,6 +1206,8 @@ def warn_pragma_unsupported_extension : Warning< + "unsupported OpenCL extension %0 - ignoring">, InGroup; + def warn_pragma_extension_is_core : Warning< + "OpenCL extension %0 is core feature or supported optional core feature - ignoring">, InGroup>, DefaultIgnore; ++def warn_opencl_pragma_feature_ignore : Warning< ++ "OpenCL feature support can't be controlled via pragma, ignoring">, InGroup; + + // OpenCL errors. + def err_opencl_taking_function_address_parser : Error< +diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td +index 941f2cafc372..aa9e8b7b011b 100644 +--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td ++++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td +@@ -9741,6 +9741,13 @@ def ext_opencl_ext_vector_type_rgba_selector: ExtWarn< + def err_openclcxx_placement_new : Error< + "use of placement new requires explicit declaration">; + ++def err_opencl_pipes_require_feat : Error< ++ "usage of OpenCL pipes requires feature support">; ++def err_opencl_memory_scope_require_feat : Error< ++ "usage of memory scope requires feature support">; ++def err_opencl_memory_ordering_require_feat : Error< ++ "usage of memory ordering requires feature support">; ++ + // MIG routine annotations. + def warn_mig_server_routine_does_not_return_kern_return_t : Warning< + "'mig_server_routine' attribute only applies to routines that return a kern_return_t">, +diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def +index 70f68d664bb7..e683f5956975 100644 +--- a/clang/include/clang/Basic/LangOptions.def ++++ b/clang/include/clang/Basic/LangOptions.def +@@ -215,6 +215,8 @@ LANGOPT(OpenCL , 1, 0, "OpenCL") + LANGOPT(OpenCLVersion , 32, 0, "OpenCL C version") + LANGOPT(OpenCLCPlusPlus , 1, 0, "C++ for OpenCL") + LANGOPT(OpenCLCPlusPlusVersion , 32, 0, "C++ for OpenCL version") ++LANGOPT(OpenCLGenericKeyword , 1, 0, "OpenCL generic keyword") ++LANGOPT(OpenCLPipeKeyword , 1, 0, "OpenCL pipe keyword") + LANGOPT(NativeHalfType , 1, 0, "Native half type support") + LANGOPT(NativeHalfArgsAndReturns, 1, 0, "Native half args and returns") + LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns") +diff --git a/clang/include/clang/Basic/LangStandards.def b/clang/include/clang/Basic/LangStandards.def +index b09568e8b3e8..7b915c312746 100644 +--- a/clang/include/clang/Basic/LangStandards.def ++++ b/clang/include/clang/Basic/LangStandards.def +@@ -167,6 +167,9 @@ LANGSTANDARD(opencl12, "cl1.2", + LANGSTANDARD(opencl20, "cl2.0", + OpenCL, "OpenCL 2.0", + LineComment | C99 | Digraphs | HexFloat | OpenCL) ++LANGSTANDARD(opencl30, "cl3.0", ++ OpenCL, "OpenCL 3.0", ++ LineComment | C99 | Digraphs | HexFloat | OpenCL) + LANGSTANDARD(openclcpp, "clc++", + OpenCL, "C++ for OpenCL", + LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 | +@@ -176,6 +179,7 @@ LANGSTANDARD_ALIAS_DEPR(opencl10, "CL") + LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1") + LANGSTANDARD_ALIAS_DEPR(opencl12, "CL1.2") + LANGSTANDARD_ALIAS_DEPR(opencl20, "CL2.0") ++LANGSTANDARD_ALIAS_DEPR(opencl30, "CL3.0") + LANGSTANDARD_ALIAS_DEPR(openclcpp, "CLC++") + + // CUDA +diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def +index 1ae36b32fb0a..d1574164f9b2 100644 +--- a/clang/include/clang/Basic/OpenCLExtensions.def ++++ b/clang/include/clang/Basic/OpenCLExtensions.def +@@ -100,6 +100,21 @@ OPENCLEXT_INTERNAL(cl_intel_subgroups, 120, ~0U) + OPENCLEXT_INTERNAL(cl_intel_subgroups_short, 120, ~0U) + OPENCLEXT_INTERNAL(cl_intel_device_side_avc_motion_estimation, 120, ~0U) + ++OPENCLEXT_INTERNAL(__opencl_c_pipes, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_generic_address_space, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_work_group_collective_functions, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_order_acq_rel, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_order_seq_cst, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_device, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_all_devices, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_subgroups, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_3d_image_writes, 100, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_device_enqueue, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_read_write_images, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_program_scope_global_variables, 200, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_fp64, 120, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_int64, 100, ~0U) ++OPENCLEXT_INTERNAL(__opencl_c_images, 100, ~0U) + #undef OPENCLEXT_INTERNAL + + #ifdef OPENCLEXT +diff --git a/clang/include/clang/Basic/OpenCLOptions.h b/clang/include/clang/Basic/OpenCLOptions.h +index 15661154eab5..d5800f5611c6 100644 +--- a/clang/include/clang/Basic/OpenCLOptions.h ++++ b/clang/include/clang/Basic/OpenCLOptions.h +@@ -21,6 +21,10 @@ namespace clang { + + /// OpenCL supported extensions and optional core features + class OpenCLOptions { ++ // OpenCL Version ++ unsigned CLVer = 120; ++ bool IsOpenCLCPlusPlus = false; ++ + struct Info { + bool Supported; // Is this option supported + bool Enabled; // Is this option enabled +@@ -31,7 +35,38 @@ class OpenCLOptions { + :Supported(S), Enabled(E), Avail(A), Core(C){} + }; + llvm::StringMap OptMap; ++ + public: ++ void setOpenCLVersion(const LangOptions &LO) { ++ IsOpenCLCPlusPlus = LO.OpenCLCPlusPlus; ++ CLVer = IsOpenCLCPlusPlus ? 200 : LO.OpenCLVersion; ++ } ++ ++ // Get extension which is semantically equivalent to a given feature ++ // if exists (e.g. __opencl_c_subgroups -> cl_khr_subgroups) ++ llvm::Optional getEquivalentExtension(StringRef Feature) const { ++ return llvm::StringSwitch>(Feature) ++ .Case("__opencl_c_3d_image_writes", ++ Optional("cl_khr_3d_image_writes")) ++ .Case("__opencl_c_subgroups", Optional("cl_khr_subgroups")) ++ .Case("__opencl_c_fp64", Optional("cl_khr_fp64")) ++ .Default(Optional()); ++ } ++ ++ // Same as above but for extensions ++ llvm::Optional getEquivalentFeature(StringRef Extension) const { ++ return llvm::StringSwitch>(Extension) ++ .Case("cl_khr_3d_image_writes", ++ Optional("__opencl_c_3d_image_writes")) ++ .Case("cl_khr_subgroups", Optional("__opencl_c_subgroups")) ++ .Case("cl_khr_fp64", Optional("__opencl_c_fp64")) ++ .Default(Optional()); ++ } ++ ++ bool isFeature(llvm::StringRef Ext) const { ++ return Ext.startswith("__opencl_c"); ++ } ++ + bool isKnown(llvm::StringRef Ext) const { + return OptMap.find(Ext) != OptMap.end(); + } +@@ -42,32 +77,88 @@ public: + + // Is supported as either an extension or an (optional) core feature for + // OpenCL version \p CLVer. +- bool isSupported(llvm::StringRef Ext, const LangOptions &LO) const { ++ bool isSupported(llvm::StringRef Ext) const { + // In C++ mode all extensions should work at least as in v2.0. +- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; + auto I = OptMap.find(Ext)->getValue(); + return I.Supported && I.Avail <= CLVer; + } + + // Is supported (optional) OpenCL core features for OpenCL version \p CLVer. + // For supported extension, return false. +- bool isSupportedCore(llvm::StringRef Ext, const LangOptions &LO) const { ++ bool isSupportedCore(llvm::StringRef Ext) const { + // In C++ mode all extensions should work at least as in v2.0. +- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; + auto I = OptMap.find(Ext)->getValue(); + return I.Supported && I.Avail <= CLVer && I.Core != ~0U && CLVer >= I.Core; + } + + // Is supported OpenCL extension for OpenCL version \p CLVer. + // For supported (optional) core feature, return false. +- bool isSupportedExtension(llvm::StringRef Ext, const LangOptions &LO) const { ++ bool isSupportedExtension(llvm::StringRef Ext) const { + // In C++ mode all extensions should work at least as in v2.0. +- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; + auto I = OptMap.find(Ext)->getValue(); + return I.Supported && I.Avail <= CLVer && (I.Core == ~0U || CLVer < I.Core); + } + ++ // Support features whose support is directly related to the ++ // specific OpenCL version. For example, OpenCL 2.0 supports ++ // all features that are optional in 3.0 ++ void adjustFeatures() { ++ // Support int64 by default (assume compiling for FULL profile) ++ OptMap["__opencl_c_int64"].Supported = true; ++ ++ if (CLVer >= 300) { ++ // Simultaneously support extension and corresponding feature ++ for (llvm::StringRef F : ++ {"__opencl_c_subgroups", "__opencl_c_3d_image_writes", ++ "__opencl_c_fp64"}) { ++ auto Ext = getEquivalentExtension(F); ++ OptMap[*Ext].Supported = OptMap[F].Supported; ++ } ++ ++ // OpenCL C compilers that define the feature macro __opencl_c_pipes or ++ // or __opencl_c_device_enqueue must also define the ++ // feature macro __opencl_c_generic_address_space. ++ if (OptMap["__opencl_c_pipes"].Supported || ++ OptMap["__opencl_c_device_enqueue"].Supported) ++ OptMap["__opencl_c_generic_address_space"].Supported = true; ++ ++ // OpenCL C compilers that define the feature macro ++ // __opencl_c_3d_image_writes or __opencl_c_read_write_images must also ++ // define the feature macro __opencl_c_images ++ if (OptMap["__opencl_c_3d_image_writes"].Supported || ++ OptMap["__opencl_c_read_write_images"].Supported) ++ OptMap["__opencl_c_images"].Supported = true; ++ ++ // All other features are already supported with options ++ // or in target settings ++ return; ++ } ++ ++ auto FeaturesRange = llvm::make_filter_range( ++ OptMap, [&](llvm::StringMapEntry &OptVal) { ++ auto Opt = OptVal.getKey(); ++ return isFeature(Opt); ++ }); ++ ++ for (auto &It : FeaturesRange) { ++ auto &Info = It.getValue(); ++ // For OpenCL version less then 3.0 some ++ // features should be supported simulateneously ++ // with specific extension ++ if (Optional Ext = getEquivalentExtension(It.getKey())) ++ Info.Supported = Info.Enabled = OptMap[*Ext].Supported; ++ else if (Info.Avail <= CLVer) ++ Info.Supported = Info.Enabled = true; ++ } ++ } ++ + void enable(llvm::StringRef Ext, bool V = true) { ++ // Ignore disabling extensions if corresponding features ++ // already supported for OpenCL version higher then 3.0 ++ if (CLVer >= 300) ++ if (Optional F = getEquivalentFeature(Ext)) ++ if (V != OptMap[*F].Enabled) ++ return; + OptMap[Ext].Enabled = V; + } + +@@ -96,7 +187,7 @@ public: + OptMap[Ext].Supported = V; + } + +- OpenCLOptions(){ ++ OpenCLOptions() { + #define OPENCLEXT_INTERNAL(Ext, AvailVer, CoreVer) \ + OptMap[#Ext].Avail = AvailVer; \ + OptMap[#Ext].Core = CoreVer; +@@ -104,35 +195,86 @@ public: + } + + void addSupport(const OpenCLOptions &Opts) { ++ assert(IsOpenCLCPlusPlus == Opts.IsOpenCLCPlusPlus && CLVer == Opts.CLVer); + for (auto &I:Opts.OptMap) +- if (I.second.Supported) ++ if (I.second.Supported) { + OptMap[I.getKey()].Supported = true; ++ // All features are enabled as they are supported ++ if (isFeature(I.getKey())) ++ OptMap[I.getKey()].Enabled = true; ++ } ++ if (CLVer >= 300) { ++ // Enabling extensions with respect to features ++ for (llvm::StringRef Ext : ++ {"cl_khr_3d_image_writes", "cl_khr_subgroups", "cl_khr_fp64"}) { ++ auto Feature = getEquivalentFeature(Ext); ++ enable(Ext, OptMap[*Feature].Enabled); ++ } ++ } + } + + void copy(const OpenCLOptions &Opts) { ++ CLVer = Opts.CLVer; ++ IsOpenCLCPlusPlus = Opts.IsOpenCLCPlusPlus; + OptMap = Opts.OptMap; + } + + // Turn on or off support of all options. + void supportAll(bool On = true) { +- for (llvm::StringMap::iterator I = OptMap.begin(), +- E = OptMap.end(); I != E; ++I) +- I->second.Supported = On; ++ for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); ++ I != E; ++I) ++ if (!isFeature(I->getKey())) ++ I->second.Supported = On; + } + + void disableAll() { +- for (llvm::StringMap::iterator I = OptMap.begin(), +- E = OptMap.end(); I != E; ++I) +- I->second.Enabled = false; ++ for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); ++ I != E; ++I) { ++ auto Ext = I->getKey(); ++ if (!isFeature(Ext)) ++ enable(Ext, false); ++ } + } + +- void enableSupportedCore(LangOptions LO) { ++ void enableSupportedCore() { + for (llvm::StringMap::iterator I = OptMap.begin(), E = OptMap.end(); + I != E; ++I) +- if (isSupportedCore(I->getKey(), LO)) ++ if (isSupportedCore(I->getKey())) + I->second.Enabled = true; + } + ++ // This enum specifies how OpenCL versions map into values ++ // for encoding. This is used when generating built-ins ++ // from tablegen ++ enum OpenCLVersionsEncodings : unsigned short { ++ OPENCL_C_100_CODE = 0x1, ++ OPENCL_C_110_CODE = 0x2, ++ OPENCL_C_120_CODE = 0x4, ++ OPENCL_C_200_CODE = 0x8, ++ OPENCL_C_300_CODE = 0x10, ++ OPENCL_C_ALL_CODE = 0x1f ++ }; ++ ++ // Encode version into single integer ++ static unsigned short EncodeVersion(unsigned OpenCLVersion) { ++ switch (OpenCLVersion) { ++ default: ++ llvm_unreachable("Unknown OpenCL version"); ++ case 0: ++ return OpenCLVersionsEncodings::OPENCL_C_ALL_CODE; ++ case 100: ++ return OpenCLVersionsEncodings::OPENCL_C_100_CODE; ++ case 110: ++ return OpenCLVersionsEncodings::OPENCL_C_110_CODE; ++ case 120: ++ return OpenCLVersionsEncodings::OPENCL_C_120_CODE; ++ case 200: ++ return OpenCLVersionsEncodings::OPENCL_C_200_CODE; ++ case 300: ++ return OpenCLVersionsEncodings::OPENCL_C_300_CODE; ++ } ++ } ++ + friend class ASTWriter; + friend class ASTReader; + }; +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index 966cb907b7e2..62536f2470c2 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -557,7 +557,7 @@ def cl_mad_enable : Flag<["-"], "cl-mad-enable">, Group, Flags<[CC + def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group, Flags<[CC1Option]>, + HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">; + def cl_std_EQ : Joined<["-"], "cl-std=">, Group, Flags<[CC1Option]>, +- HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,clc++,CLC++">; ++ HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,clc++,CLC++">; + def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group, + HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; + def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group, Flags<[CC1Option]>, +diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h +index 16a7084f6b08..5920c27229d8 100644 +--- a/clang/include/clang/Sema/Sema.h ++++ b/clang/include/clang/Sema/Sema.h +@@ -9930,6 +9930,10 @@ public: + /// \return true if type is disabled. + bool checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E); + ++ bool checkOpenCLSubgroupExtForCallExpr(CallExpr *Call); ++ ++ bool isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const; ++ + //===--------------------------------------------------------------------===// + // OpenMP directives and clauses. + // +@@ -11527,6 +11531,11 @@ public: + /// that the user intended an assignment used as condition. + void DiagnoseEqualityWithExtraParens(ParenExpr *ParenE); + ++ template ++ void DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName, ++ DiagLocT DiagLoc, DiagInfoT DiagInfo, ++ unsigned Selector, SourceRange SrcRange); ++ + /// CheckCXXBooleanCondition - Returns true if conversion to bool is invalid. + ExprResult CheckCXXBooleanCondition(Expr *CondExpr, bool IsConstexpr = false); + +diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp +index bf51d35d9693..a54ea8475d7d 100644 +--- a/clang/lib/AST/ASTContext.cpp ++++ b/clang/lib/AST/ASTContext.cpp +@@ -1433,7 +1433,8 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, + ObjCSuperType = QualType(); + + // void * type +- if (LangOpts.OpenCLVersion >= 200) { ++ if (Target.getSupportedOpenCLOpts().isEnabled( ++ "__opencl_c_generic_address_space")) { + auto Q = VoidTy.getQualifiers(); + Q.setAddressSpace(LangAS::opencl_generic); + VoidPtrTy = getPointerType(getCanonicalType( +diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp +index 0cd89df41b67..7a3067345098 100644 +--- a/clang/lib/Basic/Builtins.cpp ++++ b/clang/lib/Basic/Builtins.cpp +@@ -23,6 +23,8 @@ static const Builtin::Info BuiltinInfo[] = { + { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr }, + #define LANGBUILTIN(ID, TYPE, ATTRS, LANGS) \ + { #ID, TYPE, ATTRS, nullptr, LANGS, nullptr }, ++#define OPENCLBUILTIN(ID, TYPE, ATTRS, LANGS, FEATURE) \ ++ {#ID, TYPE, ATTRS, nullptr, LANGS, FEATURE}, + #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS) \ + { #ID, TYPE, ATTRS, HEADER, LANGS, nullptr }, + #include "clang/Basic/Builtins.def" +@@ -69,16 +71,17 @@ bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo, + bool ObjCUnsupported = !LangOpts.ObjC && BuiltinInfo.Langs == OBJC_LANG; + bool OclC1Unsupported = (LangOpts.OpenCLVersion / 100) != 1 && + (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES ) == OCLC1X_LANG; +- bool OclC2Unsupported = +- (LangOpts.OpenCLVersion != 200 && !LangOpts.OpenCLCPlusPlus) && +- (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC20_LANG; ++ bool OclC2PUnsupported = ++ (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC2P_LANG && ++ ((LangOpts.OpenCLVersion < 200 && !LangOpts.OpenCLCPlusPlus) || ++ !OclBuiltinIsSupported(BuiltinInfo, LangOpts)); + bool OclCUnsupported = !LangOpts.OpenCL && + (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES); + bool OpenMPUnsupported = !LangOpts.OpenMP && BuiltinInfo.Langs == OMP_LANG; + bool CPlusPlusUnsupported = + !LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG; + return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported && +- !OclC1Unsupported && !OclC2Unsupported && !OpenMPUnsupported && ++ !OclC1Unsupported && !OclC2PUnsupported && !OpenMPUnsupported && + !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported && + !CPlusPlusUnsupported; + } +@@ -191,3 +194,20 @@ bool Builtin::Context::canBeRedeclared(unsigned ID) const { + (!hasReferenceArgsOrResult(ID) && + !hasCustomTypechecking(ID)); + } ++ ++bool Builtin::Context::OclBuiltinIsSupported( ++ const Builtin::Info &BuiltinInfo, const LangOptions &LangOpts) const { ++ if (!requiresFeatures(BuiltinInfo)) ++ return true; ++ ++ return llvm::StringSwitch(BuiltinInfo.Features) ++ .Case("__opencl_c_device_enqueue", LangOpts.Blocks) ++ .Case("__opencl_c_generic_address_space", LangOpts.OpenCLGenericKeyword) ++ .Case("__opencl_c_pipes", LangOpts.OpenCLPipeKeyword) ++ .Default(false); ++} ++ ++bool Builtin::Context::requiresFeatures( ++ const Builtin::Info &BuiltinInfo) const { ++ return BuiltinInfo.Features && llvm::StringRef(BuiltinInfo.Features) != ""; ++} +diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp +index eccdc21d724a..6b4f524d5271 100644 +--- a/clang/lib/Basic/TargetInfo.cpp ++++ b/clang/lib/Basic/TargetInfo.cpp +@@ -387,6 +387,17 @@ void TargetInfo::adjust(LangOptions &Opts) { + HalfFormat = &llvm::APFloat::IEEEhalf(); + FloatFormat = &llvm::APFloat::IEEEsingle(); + LongDoubleFormat = &llvm::APFloat::IEEEquad(); ++ ++ auto &SupportedOCLOpts = getTargetOpts().SupportedOpenCLOptions; ++ ++ SupportedOCLOpts.setOpenCLVersion(Opts); ++ SupportedOCLOpts.adjustFeatures(); ++ ++ if (!Opts.OpenCLCPlusPlus && Opts.OpenCLVersion >= 200) ++ Opts.Blocks = SupportedOCLOpts.isSupported("__opencl_c_device_enqueue"); ++ Opts.OpenCLGenericKeyword = ++ SupportedOCLOpts.isSupported("__opencl_c_generic_address_space"); ++ Opts.OpenCLPipeKeyword = SupportedOCLOpts.isSupported("__opencl_c_pipes"); + } + + if (Opts.DoubleSize) { +diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp +index 965f273892bd..046d7c15f835 100644 +--- a/clang/lib/Basic/Targets.cpp ++++ b/clang/lib/Basic/Targets.cpp +@@ -40,7 +40,6 @@ + #include "clang/Basic/Diagnostic.h" + #include "llvm/ADT/StringExtras.h" + #include "llvm/ADT/Triple.h" +- + using namespace clang; + + namespace clang { +diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp +index 8ce488f35dd3..ca4cb0fbf018 100644 +--- a/clang/lib/CodeGen/CodeGenFunction.cpp ++++ b/clang/lib/CodeGen/CodeGenFunction.cpp +@@ -2306,11 +2306,11 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, + std::string MissingFeature; + if (BuiltinID) { + SmallVector ReqFeatures; +- const char *FeatureList = +- CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); + // Return if the builtin doesn't have any required features. +- if (!FeatureList || StringRef(FeatureList) == "") ++ if (!CGM.getContext().BuiltinInfo.requiresFeatures(BuiltinID)) + return; ++ const char *FeatureList = ++ CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); + StringRef(FeatureList).split(ReqFeatures, ','); + if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) + CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature) +diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp +index 73114c6d76cb..bc54d231b97a 100644 +--- a/clang/lib/Frontend/CompilerInvocation.cpp ++++ b/clang/lib/Frontend/CompilerInvocation.cpp +@@ -2344,6 +2344,8 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, + Opts.OpenCLVersion = 120; + else if (LangStd == LangStandard::lang_opencl20) + Opts.OpenCLVersion = 200; ++ else if (LangStd == LangStandard::lang_opencl30) ++ Opts.OpenCLVersion = 300; + else if (LangStd == LangStandard::lang_openclcpp) + Opts.OpenCLCPlusPlusVersion = 100; + +@@ -2544,14 +2546,15 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, + // -cl-std only applies for OpenCL language standards. + // Override the -std option in this case. + if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) { +- LangStandard::Kind OpenCLLangStd +- = llvm::StringSwitch(A->getValue()) +- .Cases("cl", "CL", LangStandard::lang_opencl10) +- .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) +- .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) +- .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) +- .Cases("clc++", "CLC++", LangStandard::lang_openclcpp) +- .Default(LangStandard::lang_unspecified); ++ LangStandard::Kind OpenCLLangStd = ++ llvm::StringSwitch(A->getValue()) ++ .Cases("cl", "CL", LangStandard::lang_opencl10) ++ .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) ++ .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) ++ .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) ++ .Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30) ++ .Cases("clc++", "CLC++", LangStandard::lang_openclcpp) ++ .Default(LangStandard::lang_unspecified); + + if (OpenCLLangStd == LangStandard::lang_unspecified) { + Diags.Report(diag::err_drv_invalid_value) +@@ -2852,8 +2855,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, + + Opts.RTTI = Opts.CPlusPlus && !Args.hasArg(OPT_fno_rtti); + Opts.RTTIData = Opts.RTTI && !Args.hasArg(OPT_fno_rtti_data); +- Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL +- && Opts.OpenCLVersion == 200); ++ Opts.Blocks = Args.hasArg(OPT_fblocks); + Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional); + Opts.Coroutines = Opts.CPlusPlus20 || Args.hasArg(OPT_fcoroutines_ts); + +diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp +index 6eef1e2376f6..5bb489c11909 100644 +--- a/clang/lib/Frontend/InitPreprocessor.cpp ++++ b/clang/lib/Frontend/InitPreprocessor.cpp +@@ -445,6 +445,9 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, + case 200: + Builder.defineMacro("__OPENCL_C_VERSION__", "200"); + break; ++ case 300: ++ Builder.defineMacro("__OPENCL_C_VERSION__", "300"); ++ break; + default: + llvm_unreachable("Unsupported OpenCL version"); + } +@@ -453,6 +456,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, + Builder.defineMacro("CL_VERSION_1_1", "110"); + Builder.defineMacro("CL_VERSION_1_2", "120"); + Builder.defineMacro("CL_VERSION_2_0", "200"); ++ Builder.defineMacro("CL_VERSION_3_0", "300"); + + if (TI.isLittleEndian()) + Builder.defineMacro("__ENDIAN_LITTLE__"); +@@ -1108,7 +1112,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, + // OpenCL definitions. + if (LangOpts.OpenCL) { + #define OPENCLEXT(Ext) \ +- if (TI.getSupportedOpenCLOpts().isSupported(#Ext, LangOpts)) \ ++ if (TI.getSupportedOpenCLOpts().isSupported(#Ext)) \ + Builder.defineMacro(#Ext); + #include "clang/Basic/OpenCLExtensions.def" + +diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h +index 430e07d36f62..afa900ab24d9 100644 +--- a/clang/lib/Headers/opencl-c-base.h ++++ b/clang/lib/Headers/opencl-c-base.h +@@ -9,6 +9,59 @@ + #ifndef _OPENCL_BASE_H_ + #define _OPENCL_BASE_H_ + ++// Add predefined macros to build headers with standalone executable ++#ifndef CL_VERSION_3_0 ++ #define CL_VERSION_3_0 300 ++#endif ++ ++// Define features for 2.0 for header backward compatibility ++#ifndef __opencl_c_int64 ++ #define __opencl_c_int64 1 ++#endif ++#if __OPENCL_C_VERSION__ != CL_VERSION_3_0 ++ #ifndef __opencl_c_images ++ #define __opencl_c_images 1 ++ #endif ++#endif ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) ++#ifndef __opencl_c_pipes ++ #define __opencl_c_pipes 1 ++#endif ++#ifndef __opencl_c_generic_address_space ++ #define __opencl_c_generic_address_space 1 ++#endif ++#ifndef __opencl_c_work_group_collective_functions ++ #define __opencl_c_work_group_collective_functions 1 ++#endif ++#ifndef __opencl_c_atomic_order_acq_rel ++ #define __opencl_c_atomic_order_acq_rel 1 ++#endif ++#ifndef __opencl_c_atomic_order_seq_cst ++ #define __opencl_c_atomic_order_seq_cst 1 ++#endif ++#ifndef __opencl_c_atomic_scope_device ++ #define __opencl_c_atomic_scope_device 1 ++#endif ++#ifndef __opencl_c_atomic_scope_all_devices ++ #define __opencl_c_atomic_scope_all_devices 1 ++#endif ++#ifndef __opencl_c_subgroups ++ #define __opencl_c_subgroups 1 ++#endif ++#ifndef __opencl_c_3d_image_writes ++ #define __opencl_c_3d_image_writes 1 ++#endif ++#ifndef __opencl_c_device_enqueue ++ #define __opencl_c_device_enqueue 1 ++#endif ++#ifndef __opencl_c_read_write_images ++ #define __opencl_c_read_write_images 1 ++#endif ++#ifndef __opencl_c_program_scope_global_variables ++ #define __opencl_c_program_scope_global_variables 1 ++#endif ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) ++ + // built-in scalar data types: + + /** +@@ -115,7 +168,12 @@ typedef half half4 __attribute__((ext_vector_type(4))); + typedef half half8 __attribute__((ext_vector_type(8))); + typedef half half16 __attribute__((ext_vector_type(16))); + #endif +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++ ++#ifndef __opencl_c_fp64 ++ #define __opencl_c_fp64 1 ++#endif ++ + #if __OPENCL_C_VERSION__ < CL_VERSION_1_2 + #pragma OPENCL EXTENSION cl_khr_fp64 : enable + #endif +@@ -281,9 +339,17 @@ typedef uint cl_mem_fence_flags; + typedef enum memory_scope { + memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, ++#ifdef __opencl_c_atomic_scope_device + memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, ++#endif ++#ifdef __opencl_c_atomic_scope_all_devices ++ #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ memory_scope_all_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, ++ #endif //(__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, +-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ++#endif // __opencl_c_atomic_scope_all_devices ++#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ ++ defined(__opencl_c_subgroups) + memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP + #endif + } memory_scope; +@@ -301,13 +367,14 @@ typedef enum memory_scope { + #define ATOMIC_FLAG_INIT 0 + + // enum values aligned with what clang uses in EmitAtomicExpr() +-typedef enum memory_order +-{ ++typedef enum memory_order { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, ++#ifdef __opencl_c_atomic_order_seq_cst + memory_order_seq_cst = __ATOMIC_SEQ_CST ++#endif //__opencl_c_atomic_order_seq_cst + } memory_order; + + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h +index 66e18bdd47bb..67d900eb1c3d 100644 +--- a/clang/lib/Headers/opencl-c.h ++++ b/clang/lib/Headers/opencl-c.h +@@ -35,7 +35,6 @@ + #define __purefn __attribute__((pure)) + #define __cnfn __attribute__((const)) + +- + // OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions + + char __ovld __cnfn convert_char_rte(char); +@@ -4632,7 +4631,7 @@ float16 __ovld __cnfn convert_float16(float16); + + // Conversions with double data type parameters or return value. + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + char __ovld __cnfn convert_char(double); + char __ovld __cnfn convert_char_rte(double); + char __ovld __cnfn convert_char_rtn(double); +@@ -5452,7 +5451,7 @@ double16 __ovld __cnfn convert_double16_rtz(uchar16); + double16 __ovld __cnfn convert_double16_rtz(uint16); + double16 __ovld __cnfn convert_double16_rtz(ulong16); + double16 __ovld __cnfn convert_double16_rtz(ushort16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + // Convert half types to non-double types. +@@ -6270,7 +6269,7 @@ half16 __ovld __cnfn convert_half16_rtz(float16); + half16 __ovld __cnfn convert_half16_rtz(half16); + + // Convert half types to double types. +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn convert_double(half); + double __ovld __cnfn convert_double_rte(half); + double __ovld __cnfn convert_double_rtp(half); +@@ -6333,7 +6332,7 @@ half16 __ovld __cnfn convert_half16_rte(double16); + half16 __ovld __cnfn convert_half16_rtp(double16); + half16 __ovld __cnfn convert_half16_rtn(double16); + half16 __ovld __cnfn convert_half16_rtz(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #endif // cl_khr_fp16 + +@@ -6404,14 +6403,14 @@ half16 __ovld __cnfn convert_half16_rtz(double16); + #define as_float8(x) __builtin_astype((x), float8) + #define as_float16(x) __builtin_astype((x), float16) + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #define as_double(x) __builtin_astype((x), double) + #define as_double2(x) __builtin_astype((x), double2) + #define as_double3(x) __builtin_astype((x), double3) + #define as_double4(x) __builtin_astype((x), double4) + #define as_double8(x) __builtin_astype((x), double8) + #define as_double16(x) __builtin_astype((x), double16) +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + #define as_half(x) __builtin_astype((x), half) +@@ -6534,14 +6533,14 @@ float3 __ovld __cnfn acos(float3); + float4 __ovld __cnfn acos(float4); + float8 __ovld __cnfn acos(float8); + float16 __ovld __cnfn acos(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn acos(double); + double2 __ovld __cnfn acos(double2); + double3 __ovld __cnfn acos(double3); + double4 __ovld __cnfn acos(double4); + double8 __ovld __cnfn acos(double8); + double16 __ovld __cnfn acos(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn acos(half); + half2 __ovld __cnfn acos(half2); +@@ -6560,14 +6559,14 @@ float3 __ovld __cnfn acosh(float3); + float4 __ovld __cnfn acosh(float4); + float8 __ovld __cnfn acosh(float8); + float16 __ovld __cnfn acosh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn acosh(double); + double2 __ovld __cnfn acosh(double2); + double3 __ovld __cnfn acosh(double3); + double4 __ovld __cnfn acosh(double4); + double8 __ovld __cnfn acosh(double8); + double16 __ovld __cnfn acosh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn acosh(half); + half2 __ovld __cnfn acosh(half2); +@@ -6586,14 +6585,14 @@ float3 __ovld __cnfn acospi(float3 x); + float4 __ovld __cnfn acospi(float4 x); + float8 __ovld __cnfn acospi(float8 x); + float16 __ovld __cnfn acospi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn acospi(double x); + double2 __ovld __cnfn acospi(double2 x); + double3 __ovld __cnfn acospi(double3 x); + double4 __ovld __cnfn acospi(double4 x); + double8 __ovld __cnfn acospi(double8 x); + double16 __ovld __cnfn acospi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn acospi(half x); + half2 __ovld __cnfn acospi(half2 x); +@@ -6612,14 +6611,14 @@ float3 __ovld __cnfn asin(float3); + float4 __ovld __cnfn asin(float4); + float8 __ovld __cnfn asin(float8); + float16 __ovld __cnfn asin(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn asin(double); + double2 __ovld __cnfn asin(double2); + double3 __ovld __cnfn asin(double3); + double4 __ovld __cnfn asin(double4); + double8 __ovld __cnfn asin(double8); + double16 __ovld __cnfn asin(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn asin(half); + half2 __ovld __cnfn asin(half2); +@@ -6638,14 +6637,14 @@ float3 __ovld __cnfn asinh(float3); + float4 __ovld __cnfn asinh(float4); + float8 __ovld __cnfn asinh(float8); + float16 __ovld __cnfn asinh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn asinh(double); + double2 __ovld __cnfn asinh(double2); + double3 __ovld __cnfn asinh(double3); + double4 __ovld __cnfn asinh(double4); + double8 __ovld __cnfn asinh(double8); + double16 __ovld __cnfn asinh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn asinh(half); + half2 __ovld __cnfn asinh(half2); +@@ -6664,14 +6663,14 @@ float3 __ovld __cnfn asinpi(float3 x); + float4 __ovld __cnfn asinpi(float4 x); + float8 __ovld __cnfn asinpi(float8 x); + float16 __ovld __cnfn asinpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn asinpi(double x); + double2 __ovld __cnfn asinpi(double2 x); + double3 __ovld __cnfn asinpi(double3 x); + double4 __ovld __cnfn asinpi(double4 x); + double8 __ovld __cnfn asinpi(double8 x); + double16 __ovld __cnfn asinpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn asinpi(half x); + half2 __ovld __cnfn asinpi(half2 x); +@@ -6690,14 +6689,14 @@ float3 __ovld __cnfn atan(float3 y_over_x); + float4 __ovld __cnfn atan(float4 y_over_x); + float8 __ovld __cnfn atan(float8 y_over_x); + float16 __ovld __cnfn atan(float16 y_over_x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atan(double y_over_x); + double2 __ovld __cnfn atan(double2 y_over_x); + double3 __ovld __cnfn atan(double3 y_over_x); + double4 __ovld __cnfn atan(double4 y_over_x); + double8 __ovld __cnfn atan(double8 y_over_x); + double16 __ovld __cnfn atan(double16 y_over_x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atan(half y_over_x); + half2 __ovld __cnfn atan(half2 y_over_x); +@@ -6716,14 +6715,14 @@ float3 __ovld __cnfn atan2(float3 y, float3 x); + float4 __ovld __cnfn atan2(float4 y, float4 x); + float8 __ovld __cnfn atan2(float8 y, float8 x); + float16 __ovld __cnfn atan2(float16 y, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atan2(double y, double x); + double2 __ovld __cnfn atan2(double2 y, double2 x); + double3 __ovld __cnfn atan2(double3 y, double3 x); + double4 __ovld __cnfn atan2(double4 y, double4 x); + double8 __ovld __cnfn atan2(double8 y, double8 x); + double16 __ovld __cnfn atan2(double16 y, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atan2(half y, half x); + half2 __ovld __cnfn atan2(half2 y, half2 x); +@@ -6742,14 +6741,14 @@ float3 __ovld __cnfn atanh(float3); + float4 __ovld __cnfn atanh(float4); + float8 __ovld __cnfn atanh(float8); + float16 __ovld __cnfn atanh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atanh(double); + double2 __ovld __cnfn atanh(double2); + double3 __ovld __cnfn atanh(double3); + double4 __ovld __cnfn atanh(double4); + double8 __ovld __cnfn atanh(double8); + double16 __ovld __cnfn atanh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atanh(half); + half2 __ovld __cnfn atanh(half2); +@@ -6768,14 +6767,14 @@ float3 __ovld __cnfn atanpi(float3 x); + float4 __ovld __cnfn atanpi(float4 x); + float8 __ovld __cnfn atanpi(float8 x); + float16 __ovld __cnfn atanpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atanpi(double x); + double2 __ovld __cnfn atanpi(double2 x); + double3 __ovld __cnfn atanpi(double3 x); + double4 __ovld __cnfn atanpi(double4 x); + double8 __ovld __cnfn atanpi(double8 x); + double16 __ovld __cnfn atanpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atanpi(half x); + half2 __ovld __cnfn atanpi(half2 x); +@@ -6794,14 +6793,14 @@ float3 __ovld __cnfn atan2pi(float3 y, float3 x); + float4 __ovld __cnfn atan2pi(float4 y, float4 x); + float8 __ovld __cnfn atan2pi(float8 y, float8 x); + float16 __ovld __cnfn atan2pi(float16 y, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn atan2pi(double y, double x); + double2 __ovld __cnfn atan2pi(double2 y, double2 x); + double3 __ovld __cnfn atan2pi(double3 y, double3 x); + double4 __ovld __cnfn atan2pi(double4 y, double4 x); + double8 __ovld __cnfn atan2pi(double8 y, double8 x); + double16 __ovld __cnfn atan2pi(double16 y, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn atan2pi(half y, half x); + half2 __ovld __cnfn atan2pi(half2 y, half2 x); +@@ -6820,14 +6819,14 @@ float3 __ovld __cnfn cbrt(float3); + float4 __ovld __cnfn cbrt(float4); + float8 __ovld __cnfn cbrt(float8); + float16 __ovld __cnfn cbrt(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cbrt(double); + double2 __ovld __cnfn cbrt(double2); + double3 __ovld __cnfn cbrt(double3); + double4 __ovld __cnfn cbrt(double4); + double8 __ovld __cnfn cbrt(double8); + double16 __ovld __cnfn cbrt(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cbrt(half); + half2 __ovld __cnfn cbrt(half2); +@@ -6847,14 +6846,14 @@ float3 __ovld __cnfn ceil(float3); + float4 __ovld __cnfn ceil(float4); + float8 __ovld __cnfn ceil(float8); + float16 __ovld __cnfn ceil(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn ceil(double); + double2 __ovld __cnfn ceil(double2); + double3 __ovld __cnfn ceil(double3); + double4 __ovld __cnfn ceil(double4); + double8 __ovld __cnfn ceil(double8); + double16 __ovld __cnfn ceil(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn ceil(half); + half2 __ovld __cnfn ceil(half2); +@@ -6873,14 +6872,14 @@ float3 __ovld __cnfn copysign(float3 x, float3 y); + float4 __ovld __cnfn copysign(float4 x, float4 y); + float8 __ovld __cnfn copysign(float8 x, float8 y); + float16 __ovld __cnfn copysign(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn copysign(double x, double y); + double2 __ovld __cnfn copysign(double2 x, double2 y); + double3 __ovld __cnfn copysign(double3 x, double3 y); + double4 __ovld __cnfn copysign(double4 x, double4 y); + double8 __ovld __cnfn copysign(double8 x, double8 y); + double16 __ovld __cnfn copysign(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn copysign(half x, half y); + half2 __ovld __cnfn copysign(half2 x, half2 y); +@@ -6899,14 +6898,14 @@ float3 __ovld __cnfn cos(float3); + float4 __ovld __cnfn cos(float4); + float8 __ovld __cnfn cos(float8); + float16 __ovld __cnfn cos(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cos(double); + double2 __ovld __cnfn cos(double2); + double3 __ovld __cnfn cos(double3); + double4 __ovld __cnfn cos(double4); + double8 __ovld __cnfn cos(double8); + double16 __ovld __cnfn cos(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cos(half); + half2 __ovld __cnfn cos(half2); +@@ -6925,14 +6924,14 @@ float3 __ovld __cnfn cosh(float3); + float4 __ovld __cnfn cosh(float4); + float8 __ovld __cnfn cosh(float8); + float16 __ovld __cnfn cosh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cosh(double); + double2 __ovld __cnfn cosh(double2); + double3 __ovld __cnfn cosh(double3); + double4 __ovld __cnfn cosh(double4); + double8 __ovld __cnfn cosh(double8); + double16 __ovld __cnfn cosh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cosh(half); + half2 __ovld __cnfn cosh(half2); +@@ -6951,14 +6950,14 @@ float3 __ovld __cnfn cospi(float3 x); + float4 __ovld __cnfn cospi(float4 x); + float8 __ovld __cnfn cospi(float8 x); + float16 __ovld __cnfn cospi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn cospi(double x); + double2 __ovld __cnfn cospi(double2 x); + double3 __ovld __cnfn cospi(double3 x); + double4 __ovld __cnfn cospi(double4 x); + double8 __ovld __cnfn cospi(double8 x); + double16 __ovld __cnfn cospi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn cospi(half x); + half2 __ovld __cnfn cospi(half2 x); +@@ -6977,14 +6976,14 @@ float3 __ovld __cnfn erfc(float3); + float4 __ovld __cnfn erfc(float4); + float8 __ovld __cnfn erfc(float8); + float16 __ovld __cnfn erfc(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn erfc(double); + double2 __ovld __cnfn erfc(double2); + double3 __ovld __cnfn erfc(double3); + double4 __ovld __cnfn erfc(double4); + double8 __ovld __cnfn erfc(double8); + double16 __ovld __cnfn erfc(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn erfc(half); + half2 __ovld __cnfn erfc(half2); +@@ -7004,14 +7003,14 @@ float3 __ovld __cnfn erf(float3); + float4 __ovld __cnfn erf(float4); + float8 __ovld __cnfn erf(float8); + float16 __ovld __cnfn erf(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn erf(double); + double2 __ovld __cnfn erf(double2); + double3 __ovld __cnfn erf(double3); + double4 __ovld __cnfn erf(double4); + double8 __ovld __cnfn erf(double8); + double16 __ovld __cnfn erf(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn erf(half); + half2 __ovld __cnfn erf(half2); +@@ -7030,14 +7029,14 @@ float3 __ovld __cnfn exp(float3 x); + float4 __ovld __cnfn exp(float4 x); + float8 __ovld __cnfn exp(float8 x); + float16 __ovld __cnfn exp(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn exp(double x); + double2 __ovld __cnfn exp(double2 x); + double3 __ovld __cnfn exp(double3 x); + double4 __ovld __cnfn exp(double4 x); + double8 __ovld __cnfn exp(double8 x); + double16 __ovld __cnfn exp(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn exp(half x); + half2 __ovld __cnfn exp(half2 x); +@@ -7056,14 +7055,14 @@ float3 __ovld __cnfn exp2(float3); + float4 __ovld __cnfn exp2(float4); + float8 __ovld __cnfn exp2(float8); + float16 __ovld __cnfn exp2(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn exp2(double); + double2 __ovld __cnfn exp2(double2); + double3 __ovld __cnfn exp2(double3); + double4 __ovld __cnfn exp2(double4); + double8 __ovld __cnfn exp2(double8); + double16 __ovld __cnfn exp2(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn exp2(half); + half2 __ovld __cnfn exp2(half2); +@@ -7082,14 +7081,14 @@ float3 __ovld __cnfn exp10(float3); + float4 __ovld __cnfn exp10(float4); + float8 __ovld __cnfn exp10(float8); + float16 __ovld __cnfn exp10(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn exp10(double); + double2 __ovld __cnfn exp10(double2); + double3 __ovld __cnfn exp10(double3); + double4 __ovld __cnfn exp10(double4); + double8 __ovld __cnfn exp10(double8); + double16 __ovld __cnfn exp10(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn exp10(half); + half2 __ovld __cnfn exp10(half2); +@@ -7108,14 +7107,14 @@ float3 __ovld __cnfn expm1(float3 x); + float4 __ovld __cnfn expm1(float4 x); + float8 __ovld __cnfn expm1(float8 x); + float16 __ovld __cnfn expm1(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn expm1(double x); + double2 __ovld __cnfn expm1(double2 x); + double3 __ovld __cnfn expm1(double3 x); + double4 __ovld __cnfn expm1(double4 x); + double8 __ovld __cnfn expm1(double8 x); + double16 __ovld __cnfn expm1(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn expm1(half x); + half2 __ovld __cnfn expm1(half2 x); +@@ -7134,14 +7133,14 @@ float3 __ovld __cnfn fabs(float3); + float4 __ovld __cnfn fabs(float4); + float8 __ovld __cnfn fabs(float8); + float16 __ovld __cnfn fabs(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fabs(double); + double2 __ovld __cnfn fabs(double2); + double3 __ovld __cnfn fabs(double3); + double4 __ovld __cnfn fabs(double4); + double8 __ovld __cnfn fabs(double8); + double16 __ovld __cnfn fabs(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fabs(half); + half2 __ovld __cnfn fabs(half2); +@@ -7160,14 +7159,14 @@ float3 __ovld __cnfn fdim(float3 x, float3 y); + float4 __ovld __cnfn fdim(float4 x, float4 y); + float8 __ovld __cnfn fdim(float8 x, float8 y); + float16 __ovld __cnfn fdim(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fdim(double x, double y); + double2 __ovld __cnfn fdim(double2 x, double2 y); + double3 __ovld __cnfn fdim(double3 x, double3 y); + double4 __ovld __cnfn fdim(double4 x, double4 y); + double8 __ovld __cnfn fdim(double8 x, double8 y); + double16 __ovld __cnfn fdim(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fdim(half x, half y); + half2 __ovld __cnfn fdim(half2 x, half2 y); +@@ -7187,14 +7186,14 @@ float3 __ovld __cnfn floor(float3); + float4 __ovld __cnfn floor(float4); + float8 __ovld __cnfn floor(float8); + float16 __ovld __cnfn floor(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn floor(double); + double2 __ovld __cnfn floor(double2); + double3 __ovld __cnfn floor(double3); + double4 __ovld __cnfn floor(double4); + double8 __ovld __cnfn floor(double8); + double16 __ovld __cnfn floor(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn floor(half); + half2 __ovld __cnfn floor(half2); +@@ -7217,14 +7216,14 @@ float3 __ovld __cnfn fma(float3 a, float3 b, float3 c); + float4 __ovld __cnfn fma(float4 a, float4 b, float4 c); + float8 __ovld __cnfn fma(float8 a, float8 b, float8 c); + float16 __ovld __cnfn fma(float16 a, float16 b, float16 c); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fma(double a, double b, double c); + double2 __ovld __cnfn fma(double2 a, double2 b, double2 c); + double3 __ovld __cnfn fma(double3 a, double3 b, double3 c); + double4 __ovld __cnfn fma(double4 a, double4 b, double4 c); + double8 __ovld __cnfn fma(double8 a, double8 b, double8 c); + double16 __ovld __cnfn fma(double16 a, double16 b, double16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fma(half a, half b, half c); + half2 __ovld __cnfn fma(half2 a, half2 b, half2 c); +@@ -7251,7 +7250,7 @@ float3 __ovld __cnfn fmax(float3 x, float y); + float4 __ovld __cnfn fmax(float4 x, float y); + float8 __ovld __cnfn fmax(float8 x, float y); + float16 __ovld __cnfn fmax(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fmax(double x, double y); + double2 __ovld __cnfn fmax(double2 x, double2 y); + double3 __ovld __cnfn fmax(double3 x, double3 y); +@@ -7263,7 +7262,7 @@ double3 __ovld __cnfn fmax(double3 x, double y); + double4 __ovld __cnfn fmax(double4 x, double y); + double8 __ovld __cnfn fmax(double8 x, double y); + double16 __ovld __cnfn fmax(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fmax(half x, half y); + half2 __ovld __cnfn fmax(half2 x, half2 y); +@@ -7295,7 +7294,7 @@ float3 __ovld __cnfn fmin(float3 x, float y); + float4 __ovld __cnfn fmin(float4 x, float y); + float8 __ovld __cnfn fmin(float8 x, float y); + float16 __ovld __cnfn fmin(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fmin(double x, double y); + double2 __ovld __cnfn fmin(double2 x, double2 y); + double3 __ovld __cnfn fmin(double3 x, double3 y); +@@ -7307,7 +7306,7 @@ double3 __ovld __cnfn fmin(double3 x, double y); + double4 __ovld __cnfn fmin(double4 x, double y); + double8 __ovld __cnfn fmin(double8 x, double y); + double16 __ovld __cnfn fmin(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fmin(half x, half y); + half2 __ovld __cnfn fmin(half2 x, half2 y); +@@ -7331,14 +7330,14 @@ float3 __ovld __cnfn fmod(float3 x, float3 y); + float4 __ovld __cnfn fmod(float4 x, float4 y); + float8 __ovld __cnfn fmod(float8 x, float8 y); + float16 __ovld __cnfn fmod(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn fmod(double x, double y); + double2 __ovld __cnfn fmod(double2 x, double2 y); + double3 __ovld __cnfn fmod(double3 x, double3 y); + double4 __ovld __cnfn fmod(double4 x, double4 y); + double8 __ovld __cnfn fmod(double8 x, double8 y); + double16 __ovld __cnfn fmod(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn fmod(half x, half y); + half2 __ovld __cnfn fmod(half2 x, half2 y); +@@ -7352,21 +7351,21 @@ half16 __ovld __cnfn fmod(half16 x, half16 y); + * Returns fmin(x - floor (x), 0x1.fffffep-1f ). + * floor(x) is returned in iptr. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld fract(float x, float *iptr); + float2 __ovld fract(float2 x, float2 *iptr); + float3 __ovld fract(float3 x, float3 *iptr); + float4 __ovld fract(float4 x, float4 *iptr); + float8 __ovld fract(float8 x, float8 *iptr); + float16 __ovld fract(float16 x, float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld fract(double x, double *iptr); + double2 __ovld fract(double2 x, double2 *iptr); + double3 __ovld fract(double3 x, double3 *iptr); + double4 __ovld fract(double4 x, double4 *iptr); + double8 __ovld fract(double8 x, double8 *iptr); + double16 __ovld fract(double16 x, double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld fract(half x, half *iptr); + half2 __ovld fract(half2 x, half2 *iptr); +@@ -7375,7 +7374,9 @@ half4 __ovld fract(half4 x, half4 *iptr); + half8 __ovld fract(half8 x, half8 *iptr); + half16 __ovld fract(half16 x, half16 *iptr); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld fract(float x, __global float *iptr); + float2 __ovld fract(float2 x, __global float2 *iptr); + float3 __ovld fract(float3 x, __global float3 *iptr); +@@ -7394,7 +7395,7 @@ float3 __ovld fract(float3 x, __private float3 *iptr); + float4 __ovld fract(float4 x, __private float4 *iptr); + float8 __ovld fract(float8 x, __private float8 *iptr); + float16 __ovld fract(float16 x, __private float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld fract(double x, __global double *iptr); + double2 __ovld fract(double2 x, __global double2 *iptr); + double3 __ovld fract(double3 x, __global double3 *iptr); +@@ -7413,7 +7414,7 @@ double3 __ovld fract(double3 x, __private double3 *iptr); + double4 __ovld fract(double4 x, __private double4 *iptr); + double8 __ovld fract(double8 x, __private double8 *iptr); + double16 __ovld fract(double16 x, __private double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld fract(half x, __global half *iptr); + half2 __ovld fract(half2 x, __global half2 *iptr); +@@ -7434,29 +7435,29 @@ half4 __ovld fract(half4 x, __private half4 *iptr); + half8 __ovld fract(half8 x, __private half8 *iptr); + half16 __ovld fract(half16 x, __private half16 *iptr); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +- ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Extract mantissa and exponent from x. For each + * component the mantissa returned is a float with + * magnitude in the interval [1/2, 1) or 0. Each + * component of x equals mantissa returned * 2^exp. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld frexp(float x, int *exp); + float2 __ovld frexp(float2 x, int2 *exp); + float3 __ovld frexp(float3 x, int3 *exp); + float4 __ovld frexp(float4 x, int4 *exp); + float8 __ovld frexp(float8 x, int8 *exp); + float16 __ovld frexp(float16 x, int16 *exp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld frexp(double x, int *exp); + double2 __ovld frexp(double2 x, int2 *exp); + double3 __ovld frexp(double3 x, int3 *exp); + double4 __ovld frexp(double4 x, int4 *exp); + double8 __ovld frexp(double8 x, int8 *exp); + double16 __ovld frexp(double16 x, int16 *exp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld frexp(half x, int *exp); + half2 __ovld frexp(half2 x, int2 *exp); +@@ -7465,7 +7466,9 @@ half4 __ovld frexp(half4 x, int4 *exp); + half8 __ovld frexp(half8 x, int8 *exp); + half16 __ovld frexp(half16 x, int16 *exp); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld frexp(float x, __global int *exp); + float2 __ovld frexp(float2 x, __global int2 *exp); + float3 __ovld frexp(float3 x, __global int3 *exp); +@@ -7484,7 +7487,7 @@ float3 __ovld frexp(float3 x, __private int3 *exp); + float4 __ovld frexp(float4 x, __private int4 *exp); + float8 __ovld frexp(float8 x, __private int8 *exp); + float16 __ovld frexp(float16 x, __private int16 *exp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld frexp(double x, __global int *exp); + double2 __ovld frexp(double2 x, __global int2 *exp); + double3 __ovld frexp(double3 x, __global int3 *exp); +@@ -7503,7 +7506,7 @@ double3 __ovld frexp(double3 x, __private int3 *exp); + double4 __ovld frexp(double4 x, __private int4 *exp); + double8 __ovld frexp(double8 x, __private int8 *exp); + double16 __ovld frexp(double16 x, __private int16 *exp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld frexp(half x, __global int *exp); + half2 __ovld frexp(half2 x, __global int2 *exp); +@@ -7524,7 +7527,8 @@ half4 __ovld frexp(half4 x, __private int4 *exp); + half8 __ovld frexp(half8 x, __private int8 *exp); + half16 __ovld frexp(half16 x, __private int16 *exp); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + + /** + * Compute the value of the square root of x^2 + y^2 +@@ -7536,14 +7540,14 @@ float3 __ovld __cnfn hypot(float3 x, float3 y); + float4 __ovld __cnfn hypot(float4 x, float4 y); + float8 __ovld __cnfn hypot(float8 x, float8 y); + float16 __ovld __cnfn hypot(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn hypot(double x, double y); + double2 __ovld __cnfn hypot(double2 x, double2 y); + double3 __ovld __cnfn hypot(double3 x, double3 y); + double4 __ovld __cnfn hypot(double4 x, double4 y); + double8 __ovld __cnfn hypot(double8 x, double8 y); + double16 __ovld __cnfn hypot(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn hypot(half x, half y); + half2 __ovld __cnfn hypot(half2 x, half2 y); +@@ -7562,14 +7566,14 @@ int3 __ovld __cnfn ilogb(float3 x); + int4 __ovld __cnfn ilogb(float4 x); + int8 __ovld __cnfn ilogb(float8 x); + int16 __ovld __cnfn ilogb(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn ilogb(double x); + int2 __ovld __cnfn ilogb(double2 x); + int3 __ovld __cnfn ilogb(double3 x); + int4 __ovld __cnfn ilogb(double4 x); + int8 __ovld __cnfn ilogb(double8 x); + int16 __ovld __cnfn ilogb(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn ilogb(half x); + int2 __ovld __cnfn ilogb(half2 x); +@@ -7593,7 +7597,7 @@ float3 __ovld __cnfn ldexp(float3 x, int n); + float4 __ovld __cnfn ldexp(float4 x, int n); + float8 __ovld __cnfn ldexp(float8 x, int n); + float16 __ovld __cnfn ldexp(float16 x, int n); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn ldexp(double x, int n); + double2 __ovld __cnfn ldexp(double2 x, int2 n); + double3 __ovld __cnfn ldexp(double3 x, int3 n); +@@ -7605,7 +7609,7 @@ double3 __ovld __cnfn ldexp(double3 x, int n); + double4 __ovld __cnfn ldexp(double4 x, int n); + double8 __ovld __cnfn ldexp(double8 x, int n); + double16 __ovld __cnfn ldexp(double16 x, int n); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn ldexp(half x, int n); + half2 __ovld __cnfn ldexp(half2 x, int2 n); +@@ -7632,14 +7636,14 @@ float3 __ovld __cnfn lgamma(float3 x); + float4 __ovld __cnfn lgamma(float4 x); + float8 __ovld __cnfn lgamma(float8 x); + float16 __ovld __cnfn lgamma(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn lgamma(double x); + double2 __ovld __cnfn lgamma(double2 x); + double3 __ovld __cnfn lgamma(double3 x); + double4 __ovld __cnfn lgamma(double4 x); + double8 __ovld __cnfn lgamma(double8 x); + double16 __ovld __cnfn lgamma(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn lgamma(half x); + half2 __ovld __cnfn lgamma(half2 x); +@@ -7649,21 +7653,21 @@ half8 __ovld __cnfn lgamma(half8 x); + half16 __ovld __cnfn lgamma(half16 x); + #endif //cl_khr_fp16 + +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld lgamma_r(float x, int *signp); + float2 __ovld lgamma_r(float2 x, int2 *signp); + float3 __ovld lgamma_r(float3 x, int3 *signp); + float4 __ovld lgamma_r(float4 x, int4 *signp); + float8 __ovld lgamma_r(float8 x, int8 *signp); + float16 __ovld lgamma_r(float16 x, int16 *signp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld lgamma_r(double x, int *signp); + double2 __ovld lgamma_r(double2 x, int2 *signp); + double3 __ovld lgamma_r(double3 x, int3 *signp); + double4 __ovld lgamma_r(double4 x, int4 *signp); + double8 __ovld lgamma_r(double8 x, int8 *signp); + double16 __ovld lgamma_r(double16 x, int16 *signp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld lgamma_r(half x, int *signp); + half2 __ovld lgamma_r(half2 x, int2 *signp); +@@ -7672,7 +7676,9 @@ half4 __ovld lgamma_r(half4 x, int4 *signp); + half8 __ovld lgamma_r(half8 x, int8 *signp); + half16 __ovld lgamma_r(half16 x, int16 *signp); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld lgamma_r(float x, __global int *signp); + float2 __ovld lgamma_r(float2 x, __global int2 *signp); + float3 __ovld lgamma_r(float3 x, __global int3 *signp); +@@ -7691,7 +7697,7 @@ float3 __ovld lgamma_r(float3 x, __private int3 *signp); + float4 __ovld lgamma_r(float4 x, __private int4 *signp); + float8 __ovld lgamma_r(float8 x, __private int8 *signp); + float16 __ovld lgamma_r(float16 x, __private int16 *signp); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld lgamma_r(double x, __global int *signp); + double2 __ovld lgamma_r(double2 x, __global int2 *signp); + double3 __ovld lgamma_r(double3 x, __global int3 *signp); +@@ -7710,7 +7716,7 @@ double3 __ovld lgamma_r(double3 x, __private int3 *signp); + double4 __ovld lgamma_r(double4 x, __private int4 *signp); + double8 __ovld lgamma_r(double8 x, __private int8 *signp); + double16 __ovld lgamma_r(double16 x, __private int16 *signp); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld lgamma_r(half x, __global int *signp); + half2 __ovld lgamma_r(half2 x, __global int2 *signp); +@@ -7731,8 +7737,8 @@ half4 __ovld lgamma_r(half4 x, __private int4 *signp); + half8 __ovld lgamma_r(half8 x, __private int8 *signp); + half16 __ovld lgamma_r(half16 x, __private int16 *signp); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +- ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Compute natural logarithm. + */ +@@ -7742,14 +7748,14 @@ float3 __ovld __cnfn log(float3); + float4 __ovld __cnfn log(float4); + float8 __ovld __cnfn log(float8); + float16 __ovld __cnfn log(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log(double); + double2 __ovld __cnfn log(double2); + double3 __ovld __cnfn log(double3); + double4 __ovld __cnfn log(double4); + double8 __ovld __cnfn log(double8); + double16 __ovld __cnfn log(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log(half); + half2 __ovld __cnfn log(half2); +@@ -7760,7 +7766,7 @@ half16 __ovld __cnfn log(half16); + #endif //cl_khr_fp16 + + /** +- * Compute a base 2 logarithm. ++ * Compute a base 2 logarithm + */ + float __ovld __cnfn log2(float); + float2 __ovld __cnfn log2(float2); +@@ -7768,14 +7774,14 @@ float3 __ovld __cnfn log2(float3); + float4 __ovld __cnfn log2(float4); + float8 __ovld __cnfn log2(float8); + float16 __ovld __cnfn log2(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log2(double); + double2 __ovld __cnfn log2(double2); + double3 __ovld __cnfn log2(double3); + double4 __ovld __cnfn log2(double4); + double8 __ovld __cnfn log2(double8); + double16 __ovld __cnfn log2(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log2(half); + half2 __ovld __cnfn log2(half2); +@@ -7794,14 +7800,14 @@ float3 __ovld __cnfn log10(float3); + float4 __ovld __cnfn log10(float4); + float8 __ovld __cnfn log10(float8); + float16 __ovld __cnfn log10(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log10(double); + double2 __ovld __cnfn log10(double2); + double3 __ovld __cnfn log10(double3); + double4 __ovld __cnfn log10(double4); + double8 __ovld __cnfn log10(double8); + double16 __ovld __cnfn log10(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log10(half); + half2 __ovld __cnfn log10(half2); +@@ -7820,14 +7826,14 @@ float3 __ovld __cnfn log1p(float3 x); + float4 __ovld __cnfn log1p(float4 x); + float8 __ovld __cnfn log1p(float8 x); + float16 __ovld __cnfn log1p(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn log1p(double x); + double2 __ovld __cnfn log1p(double2 x); + double3 __ovld __cnfn log1p(double3 x); + double4 __ovld __cnfn log1p(double4 x); + double8 __ovld __cnfn log1p(double8 x); + double16 __ovld __cnfn log1p(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn log1p(half x); + half2 __ovld __cnfn log1p(half2 x); +@@ -7847,14 +7853,14 @@ float3 __ovld __cnfn logb(float3 x); + float4 __ovld __cnfn logb(float4 x); + float8 __ovld __cnfn logb(float8 x); + float16 __ovld __cnfn logb(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn logb(double x); + double2 __ovld __cnfn logb(double2 x); + double3 __ovld __cnfn logb(double3 x); + double4 __ovld __cnfn logb(double4 x); + double8 __ovld __cnfn logb(double8 x); + double16 __ovld __cnfn logb(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn logb(half x); + half2 __ovld __cnfn logb(half2 x); +@@ -7877,14 +7883,14 @@ float3 __ovld __cnfn mad(float3 a, float3 b, float3 c); + float4 __ovld __cnfn mad(float4 a, float4 b, float4 c); + float8 __ovld __cnfn mad(float8 a, float8 b, float8 c); + float16 __ovld __cnfn mad(float16 a, float16 b, float16 c); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn mad(double a, double b, double c); + double2 __ovld __cnfn mad(double2 a, double2 b, double2 c); + double3 __ovld __cnfn mad(double3 a, double3 b, double3 c); + double4 __ovld __cnfn mad(double4 a, double4 b, double4 c); + double8 __ovld __cnfn mad(double8 a, double8 b, double8 c); + double16 __ovld __cnfn mad(double16 a, double16 b, double16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn mad(half a, half b, half c); + half2 __ovld __cnfn mad(half2 a, half2 b, half2 c); +@@ -7904,14 +7910,14 @@ float3 __ovld __cnfn maxmag(float3 x, float3 y); + float4 __ovld __cnfn maxmag(float4 x, float4 y); + float8 __ovld __cnfn maxmag(float8 x, float8 y); + float16 __ovld __cnfn maxmag(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn maxmag(double x, double y); + double2 __ovld __cnfn maxmag(double2 x, double2 y); + double3 __ovld __cnfn maxmag(double3 x, double3 y); + double4 __ovld __cnfn maxmag(double4 x, double4 y); + double8 __ovld __cnfn maxmag(double8 x, double8 y); + double16 __ovld __cnfn maxmag(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn maxmag(half x, half y); + half2 __ovld __cnfn maxmag(half2 x, half2 y); +@@ -7931,14 +7937,14 @@ float3 __ovld __cnfn minmag(float3 x, float3 y); + float4 __ovld __cnfn minmag(float4 x, float4 y); + float8 __ovld __cnfn minmag(float8 x, float8 y); + float16 __ovld __cnfn minmag(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn minmag(double x, double y); + double2 __ovld __cnfn minmag(double2 x, double2 y); + double3 __ovld __cnfn minmag(double3 x, double3 y); + double4 __ovld __cnfn minmag(double4 x, double4 y); + double8 __ovld __cnfn minmag(double8 x, double8 y); + double16 __ovld __cnfn minmag(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn minmag(half x, half y); + half2 __ovld __cnfn minmag(half2 x, half2 y); +@@ -7955,21 +7961,21 @@ half16 __ovld __cnfn minmag(half16 x, half16 y); + * the argument. It stores the integral part in the object + * pointed to by iptr. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld modf(float x, float *iptr); + float2 __ovld modf(float2 x, float2 *iptr); + float3 __ovld modf(float3 x, float3 *iptr); + float4 __ovld modf(float4 x, float4 *iptr); + float8 __ovld modf(float8 x, float8 *iptr); + float16 __ovld modf(float16 x, float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld modf(double x, double *iptr); + double2 __ovld modf(double2 x, double2 *iptr); + double3 __ovld modf(double3 x, double3 *iptr); + double4 __ovld modf(double4 x, double4 *iptr); + double8 __ovld modf(double8 x, double8 *iptr); + double16 __ovld modf(double16 x, double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld modf(half x, half *iptr); + half2 __ovld modf(half2 x, half2 *iptr); +@@ -7978,7 +7984,9 @@ half4 __ovld modf(half4 x, half4 *iptr); + half8 __ovld modf(half8 x, half8 *iptr); + half16 __ovld modf(half16 x, half16 *iptr); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld modf(float x, __global float *iptr); + float2 __ovld modf(float2 x, __global float2 *iptr); + float3 __ovld modf(float3 x, __global float3 *iptr); +@@ -7997,7 +8005,7 @@ float3 __ovld modf(float3 x, __private float3 *iptr); + float4 __ovld modf(float4 x, __private float4 *iptr); + float8 __ovld modf(float8 x, __private float8 *iptr); + float16 __ovld modf(float16 x, __private float16 *iptr); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld modf(double x, __global double *iptr); + double2 __ovld modf(double2 x, __global double2 *iptr); + double3 __ovld modf(double3 x, __global double3 *iptr); +@@ -8016,7 +8024,7 @@ double3 __ovld modf(double3 x, __private double3 *iptr); + double4 __ovld modf(double4 x, __private double4 *iptr); + double8 __ovld modf(double8 x, __private double8 *iptr); + double16 __ovld modf(double16 x, __private double16 *iptr); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld modf(half x, __global half *iptr); + half2 __ovld modf(half2 x, __global half2 *iptr); +@@ -8037,7 +8045,8 @@ half4 __ovld modf(half4 x, __private half4 *iptr); + half8 __ovld modf(half8 x, __private half8 *iptr); + half16 __ovld modf(half16 x, __private half16 *iptr); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + + /** + * Returns a quiet NaN. The nancode may be placed +@@ -8049,14 +8058,14 @@ float3 __ovld __cnfn nan(uint3 nancode); + float4 __ovld __cnfn nan(uint4 nancode); + float8 __ovld __cnfn nan(uint8 nancode); + float16 __ovld __cnfn nan(uint16 nancode); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn nan(ulong nancode); + double2 __ovld __cnfn nan(ulong2 nancode); + double3 __ovld __cnfn nan(ulong3 nancode); + double4 __ovld __cnfn nan(ulong4 nancode); + double8 __ovld __cnfn nan(ulong8 nancode); + double16 __ovld __cnfn nan(ulong16 nancode); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn nan(ushort nancode); + half2 __ovld __cnfn nan(ushort2 nancode); +@@ -8079,14 +8088,14 @@ float3 __ovld __cnfn nextafter(float3 x, float3 y); + float4 __ovld __cnfn nextafter(float4 x, float4 y); + float8 __ovld __cnfn nextafter(float8 x, float8 y); + float16 __ovld __cnfn nextafter(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn nextafter(double x, double y); + double2 __ovld __cnfn nextafter(double2 x, double2 y); + double3 __ovld __cnfn nextafter(double3 x, double3 y); + double4 __ovld __cnfn nextafter(double4 x, double4 y); + double8 __ovld __cnfn nextafter(double8 x, double8 y); + double16 __ovld __cnfn nextafter(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn nextafter(half x, half y); + half2 __ovld __cnfn nextafter(half2 x, half2 y); +@@ -8105,14 +8114,14 @@ float3 __ovld __cnfn pow(float3 x, float3 y); + float4 __ovld __cnfn pow(float4 x, float4 y); + float8 __ovld __cnfn pow(float8 x, float8 y); + float16 __ovld __cnfn pow(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn pow(double x, double y); + double2 __ovld __cnfn pow(double2 x, double2 y); + double3 __ovld __cnfn pow(double3 x, double3 y); + double4 __ovld __cnfn pow(double4 x, double4 y); + double8 __ovld __cnfn pow(double8 x, double8 y); + double16 __ovld __cnfn pow(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn pow(half x, half y); + half2 __ovld __cnfn pow(half2 x, half2 y); +@@ -8131,14 +8140,14 @@ float3 __ovld __cnfn pown(float3 x, int3 y); + float4 __ovld __cnfn pown(float4 x, int4 y); + float8 __ovld __cnfn pown(float8 x, int8 y); + float16 __ovld __cnfn pown(float16 x, int16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn pown(double x, int y); + double2 __ovld __cnfn pown(double2 x, int2 y); + double3 __ovld __cnfn pown(double3 x, int3 y); + double4 __ovld __cnfn pown(double4 x, int4 y); + double8 __ovld __cnfn pown(double8 x, int8 y); + double16 __ovld __cnfn pown(double16 x, int16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn pown(half x, int y); + half2 __ovld __cnfn pown(half2 x, int2 y); +@@ -8157,14 +8166,14 @@ float3 __ovld __cnfn powr(float3 x, float3 y); + float4 __ovld __cnfn powr(float4 x, float4 y); + float8 __ovld __cnfn powr(float8 x, float8 y); + float16 __ovld __cnfn powr(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn powr(double x, double y); + double2 __ovld __cnfn powr(double2 x, double2 y); + double3 __ovld __cnfn powr(double3 x, double3 y); + double4 __ovld __cnfn powr(double4 x, double4 y); + double8 __ovld __cnfn powr(double8 x, double8 y); + double16 __ovld __cnfn powr(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn powr(half x, half y); + half2 __ovld __cnfn powr(half2 x, half2 y); +@@ -8186,14 +8195,14 @@ float3 __ovld __cnfn remainder(float3 x, float3 y); + float4 __ovld __cnfn remainder(float4 x, float4 y); + float8 __ovld __cnfn remainder(float8 x, float8 y); + float16 __ovld __cnfn remainder(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn remainder(double x, double y); + double2 __ovld __cnfn remainder(double2 x, double2 y); + double3 __ovld __cnfn remainder(double3 x, double3 y); + double4 __ovld __cnfn remainder(double4 x, double4 y); + double8 __ovld __cnfn remainder(double8 x, double8 y); + double16 __ovld __cnfn remainder(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn remainder(half x, half y); + half2 __ovld __cnfn remainder(half2 x, half2 y); +@@ -8215,21 +8224,21 @@ half16 __ovld __cnfn remainder(half16 x, half16 y); + * sign as x/y. It stores this signed value in the object + * pointed to by quo. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld remquo(float x, float y, int *quo); + float2 __ovld remquo(float2 x, float2 y, int2 *quo); + float3 __ovld remquo(float3 x, float3 y, int3 *quo); + float4 __ovld remquo(float4 x, float4 y, int4 *quo); + float8 __ovld remquo(float8 x, float8 y, int8 *quo); + float16 __ovld remquo(float16 x, float16 y, int16 *quo); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld remquo(double x, double y, int *quo); + double2 __ovld remquo(double2 x, double2 y, int2 *quo); + double3 __ovld remquo(double3 x, double3 y, int3 *quo); + double4 __ovld remquo(double4 x, double4 y, int4 *quo); + double8 __ovld remquo(double8 x, double8 y, int8 *quo); + double16 __ovld remquo(double16 x, double16 y, int16 *quo); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld remquo(half x, half y, int *quo); + half2 __ovld remquo(half2 x, half2 y, int2 *quo); +@@ -8237,9 +8246,10 @@ half3 __ovld remquo(half3 x, half3 y, int3 *quo); + half4 __ovld remquo(half4 x, half4 y, int4 *quo); + half8 __ovld remquo(half8 x, half8 y, int8 *quo); + half16 __ovld remquo(half16 x, half16 y, int16 *quo); +- + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld remquo(float x, float y, __global int *quo); + float2 __ovld remquo(float2 x, float2 y, __global int2 *quo); + float3 __ovld remquo(float3 x, float3 y, __global int3 *quo); +@@ -8258,7 +8268,7 @@ float3 __ovld remquo(float3 x, float3 y, __private int3 *quo); + float4 __ovld remquo(float4 x, float4 y, __private int4 *quo); + float8 __ovld remquo(float8 x, float8 y, __private int8 *quo); + float16 __ovld remquo(float16 x, float16 y, __private int16 *quo); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld remquo(double x, double y, __global int *quo); + double2 __ovld remquo(double2 x, double2 y, __global int2 *quo); + double3 __ovld remquo(double3 x, double3 y, __global int3 *quo); +@@ -8277,7 +8287,7 @@ double3 __ovld remquo(double3 x, double3 y, __private int3 *quo); + double4 __ovld remquo(double4 x, double4 y, __private int4 *quo); + double8 __ovld remquo(double8 x, double8 y, __private int8 *quo); + double16 __ovld remquo(double16 x, double16 y, __private int16 *quo); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld remquo(half x, half y, __global int *quo); + half2 __ovld remquo(half2 x, half2 y, __global int2 *quo); +@@ -8298,7 +8308,8 @@ half4 __ovld remquo(half4 x, half4 y, __private int4 *quo); + half8 __ovld remquo(half8 x, half8 y, __private int8 *quo); + half16 __ovld remquo(half16 x, half16 y, __private int16 *quo); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Round to integral value (using round to nearest + * even rounding mode) in floating-point format. +@@ -8311,14 +8322,14 @@ float3 __ovld __cnfn rint(float3); + float4 __ovld __cnfn rint(float4); + float8 __ovld __cnfn rint(float8); + float16 __ovld __cnfn rint(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn rint(double); + double2 __ovld __cnfn rint(double2); + double3 __ovld __cnfn rint(double3); + double4 __ovld __cnfn rint(double4); + double8 __ovld __cnfn rint(double8); + double16 __ovld __cnfn rint(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn rint(half); + half2 __ovld __cnfn rint(half2); +@@ -8337,14 +8348,14 @@ float3 __ovld __cnfn rootn(float3 x, int3 y); + float4 __ovld __cnfn rootn(float4 x, int4 y); + float8 __ovld __cnfn rootn(float8 x, int8 y); + float16 __ovld __cnfn rootn(float16 x, int16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn rootn(double x, int y); + double2 __ovld __cnfn rootn(double2 x, int2 y); + double3 __ovld __cnfn rootn(double3 x, int3 y); + double4 __ovld __cnfn rootn(double4 x, int4 y); + double8 __ovld __cnfn rootn(double8 x, int8 y); + double16 __ovld __cnfn rootn(double16 x, int16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn rootn(half x, int y); + half2 __ovld __cnfn rootn(half2 x, int2 y); +@@ -8365,14 +8376,14 @@ float3 __ovld __cnfn round(float3 x); + float4 __ovld __cnfn round(float4 x); + float8 __ovld __cnfn round(float8 x); + float16 __ovld __cnfn round(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn round(double x); + double2 __ovld __cnfn round(double2 x); + double3 __ovld __cnfn round(double3 x); + double4 __ovld __cnfn round(double4 x); + double8 __ovld __cnfn round(double8 x); + double16 __ovld __cnfn round(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn round(half x); + half2 __ovld __cnfn round(half2 x); +@@ -8391,14 +8402,14 @@ float3 __ovld __cnfn rsqrt(float3); + float4 __ovld __cnfn rsqrt(float4); + float8 __ovld __cnfn rsqrt(float8); + float16 __ovld __cnfn rsqrt(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn rsqrt(double); + double2 __ovld __cnfn rsqrt(double2); + double3 __ovld __cnfn rsqrt(double3); + double4 __ovld __cnfn rsqrt(double4); + double8 __ovld __cnfn rsqrt(double8); + double16 __ovld __cnfn rsqrt(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn rsqrt(half); + half2 __ovld __cnfn rsqrt(half2); +@@ -8417,14 +8428,14 @@ float3 __ovld __cnfn sin(float3); + float4 __ovld __cnfn sin(float4); + float8 __ovld __cnfn sin(float8); + float16 __ovld __cnfn sin(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sin(double); + double2 __ovld __cnfn sin(double2); + double3 __ovld __cnfn sin(double3); + double4 __ovld __cnfn sin(double4); + double8 __ovld __cnfn sin(double8); + double16 __ovld __cnfn sin(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sin(half); + half2 __ovld __cnfn sin(half2); +@@ -8439,21 +8450,21 @@ half16 __ovld __cnfn sin(half16); + * is the return value and computed cosine is returned + * in cosval. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld sincos(float x, float *cosval); + float2 __ovld sincos(float2 x, float2 *cosval); + float3 __ovld sincos(float3 x, float3 *cosval); + float4 __ovld sincos(float4 x, float4 *cosval); + float8 __ovld sincos(float8 x, float8 *cosval); + float16 __ovld sincos(float16 x, float16 *cosval); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld sincos(double x, double *cosval); + double2 __ovld sincos(double2 x, double2 *cosval); + double3 __ovld sincos(double3 x, double3 *cosval); + double4 __ovld sincos(double4 x, double4 *cosval); + double8 __ovld sincos(double8 x, double8 *cosval); + double16 __ovld sincos(double16 x, double16 *cosval); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld sincos(half x, half *cosval); + half2 __ovld sincos(half2 x, half2 *cosval); +@@ -8462,7 +8473,9 @@ half4 __ovld sincos(half4 x, half4 *cosval); + half8 __ovld sincos(half8 x, half8 *cosval); + half16 __ovld sincos(half16 x, half16 *cosval); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space ++ ++#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) + float __ovld sincos(float x, __global float *cosval); + float2 __ovld sincos(float2 x, __global float2 *cosval); + float3 __ovld sincos(float3 x, __global float3 *cosval); +@@ -8481,7 +8494,7 @@ float3 __ovld sincos(float3 x, __private float3 *cosval); + float4 __ovld sincos(float4 x, __private float4 *cosval); + float8 __ovld sincos(float8 x, __private float8 *cosval); + float16 __ovld sincos(float16 x, __private float16 *cosval); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld sincos(double x, __global double *cosval); + double2 __ovld sincos(double2 x, __global double2 *cosval); + double3 __ovld sincos(double3 x, __global double3 *cosval); +@@ -8500,7 +8513,7 @@ double3 __ovld sincos(double3 x, __private double3 *cosval); + double4 __ovld sincos(double4 x, __private double4 *cosval); + double8 __ovld sincos(double8 x, __private double8 *cosval); + double16 __ovld sincos(double16 x, __private double16 *cosval); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld sincos(half x, __global half *cosval); + half2 __ovld sincos(half2 x, __global half2 *cosval); +@@ -8521,8 +8534,8 @@ half4 __ovld sincos(half4 x, __private half4 *cosval); + half8 __ovld sincos(half8 x, __private half8 *cosval); + half16 __ovld sincos(half16 x, __private half16 *cosval); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +- ++#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != ++ //! CL_VERSION_2_0) + /** + * Compute hyperbolic sine. + */ +@@ -8532,14 +8545,14 @@ float3 __ovld __cnfn sinh(float3); + float4 __ovld __cnfn sinh(float4); + float8 __ovld __cnfn sinh(float8); + float16 __ovld __cnfn sinh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sinh(double); + double2 __ovld __cnfn sinh(double2); + double3 __ovld __cnfn sinh(double3); + double4 __ovld __cnfn sinh(double4); + double8 __ovld __cnfn sinh(double8); + double16 __ovld __cnfn sinh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sinh(half); + half2 __ovld __cnfn sinh(half2); +@@ -8558,14 +8571,14 @@ float3 __ovld __cnfn sinpi(float3 x); + float4 __ovld __cnfn sinpi(float4 x); + float8 __ovld __cnfn sinpi(float8 x); + float16 __ovld __cnfn sinpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sinpi(double x); + double2 __ovld __cnfn sinpi(double2 x); + double3 __ovld __cnfn sinpi(double3 x); + double4 __ovld __cnfn sinpi(double4 x); + double8 __ovld __cnfn sinpi(double8 x); + double16 __ovld __cnfn sinpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sinpi(half x); + half2 __ovld __cnfn sinpi(half2 x); +@@ -8584,14 +8597,14 @@ float3 __ovld __cnfn sqrt(float3); + float4 __ovld __cnfn sqrt(float4); + float8 __ovld __cnfn sqrt(float8); + float16 __ovld __cnfn sqrt(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sqrt(double); + double2 __ovld __cnfn sqrt(double2); + double3 __ovld __cnfn sqrt(double3); + double4 __ovld __cnfn sqrt(double4); + double8 __ovld __cnfn sqrt(double8); + double16 __ovld __cnfn sqrt(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sqrt(half); + half2 __ovld __cnfn sqrt(half2); +@@ -8610,14 +8623,14 @@ float3 __ovld __cnfn tan(float3); + float4 __ovld __cnfn tan(float4); + float8 __ovld __cnfn tan(float8); + float16 __ovld __cnfn tan(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tan(double); + double2 __ovld __cnfn tan(double2); + double3 __ovld __cnfn tan(double3); + double4 __ovld __cnfn tan(double4); + double8 __ovld __cnfn tan(double8); + double16 __ovld __cnfn tan(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tan(half); + half2 __ovld __cnfn tan(half2); +@@ -8636,14 +8649,14 @@ float3 __ovld __cnfn tanh(float3); + float4 __ovld __cnfn tanh(float4); + float8 __ovld __cnfn tanh(float8); + float16 __ovld __cnfn tanh(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tanh(double); + double2 __ovld __cnfn tanh(double2); + double3 __ovld __cnfn tanh(double3); + double4 __ovld __cnfn tanh(double4); + double8 __ovld __cnfn tanh(double8); + double16 __ovld __cnfn tanh(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tanh(half); + half2 __ovld __cnfn tanh(half2); +@@ -8662,14 +8675,14 @@ float3 __ovld __cnfn tanpi(float3 x); + float4 __ovld __cnfn tanpi(float4 x); + float8 __ovld __cnfn tanpi(float8 x); + float16 __ovld __cnfn tanpi(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tanpi(double x); + double2 __ovld __cnfn tanpi(double2 x); + double3 __ovld __cnfn tanpi(double3 x); + double4 __ovld __cnfn tanpi(double4 x); + double8 __ovld __cnfn tanpi(double8 x); + double16 __ovld __cnfn tanpi(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tanpi(half x); + half2 __ovld __cnfn tanpi(half2 x); +@@ -8688,14 +8701,14 @@ float3 __ovld __cnfn tgamma(float3); + float4 __ovld __cnfn tgamma(float4); + float8 __ovld __cnfn tgamma(float8); + float16 __ovld __cnfn tgamma(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn tgamma(double); + double2 __ovld __cnfn tgamma(double2); + double3 __ovld __cnfn tgamma(double3); + double4 __ovld __cnfn tgamma(double4); + double8 __ovld __cnfn tgamma(double8); + double16 __ovld __cnfn tgamma(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn tgamma(half); + half2 __ovld __cnfn tgamma(half2); +@@ -8715,14 +8728,14 @@ float3 __ovld __cnfn trunc(float3); + float4 __ovld __cnfn trunc(float4); + float8 __ovld __cnfn trunc(float8); + float16 __ovld __cnfn trunc(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn trunc(double); + double2 __ovld __cnfn trunc(double2); + double3 __ovld __cnfn trunc(double3); + double4 __ovld __cnfn trunc(double4); + double8 __ovld __cnfn trunc(double8); + double16 __ovld __cnfn trunc(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn trunc(half); + half2 __ovld __cnfn trunc(half2); +@@ -10108,7 +10121,7 @@ float3 __ovld __cnfn clamp(float3 x, float minval, float maxval); + float4 __ovld __cnfn clamp(float4 x, float minval, float maxval); + float8 __ovld __cnfn clamp(float8 x, float minval, float maxval); + float16 __ovld __cnfn clamp(float16 x, float minval, float maxval); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn clamp(double x, double minval, double maxval); + double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval); + double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval); +@@ -10120,7 +10133,7 @@ double3 __ovld __cnfn clamp(double3 x, double minval, double maxval); + double4 __ovld __cnfn clamp(double4 x, double minval, double maxval); + double8 __ovld __cnfn clamp(double8 x, double minval, double maxval); + double16 __ovld __cnfn clamp(double16 x, double minval, double maxval); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn clamp(half x, half minval, half maxval); + half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval); +@@ -10145,14 +10158,14 @@ float3 __ovld __cnfn degrees(float3 radians); + float4 __ovld __cnfn degrees(float4 radians); + float8 __ovld __cnfn degrees(float8 radians); + float16 __ovld __cnfn degrees(float16 radians); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn degrees(double radians); + double2 __ovld __cnfn degrees(double2 radians); + double3 __ovld __cnfn degrees(double3 radians); + double4 __ovld __cnfn degrees(double4 radians); + double8 __ovld __cnfn degrees(double8 radians); + double16 __ovld __cnfn degrees(double16 radians); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn degrees(half radians); + half2 __ovld __cnfn degrees(half2 radians); +@@ -10177,7 +10190,7 @@ float3 __ovld __cnfn max(float3 x, float y); + float4 __ovld __cnfn max(float4 x, float y); + float8 __ovld __cnfn max(float8 x, float y); + float16 __ovld __cnfn max(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn max(double x, double y); + double2 __ovld __cnfn max(double2 x, double2 y); + double3 __ovld __cnfn max(double3 x, double3 y); +@@ -10189,7 +10202,7 @@ double3 __ovld __cnfn max(double3 x, double y); + double4 __ovld __cnfn max(double4 x, double y); + double8 __ovld __cnfn max(double8 x, double y); + double16 __ovld __cnfn max(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn max(half x, half y); + half2 __ovld __cnfn max(half2 x, half2 y); +@@ -10219,7 +10232,7 @@ float3 __ovld __cnfn min(float3 x, float y); + float4 __ovld __cnfn min(float4 x, float y); + float8 __ovld __cnfn min(float8 x, float y); + float16 __ovld __cnfn min(float16 x, float y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn min(double x, double y); + double2 __ovld __cnfn min(double2 x, double2 y); + double3 __ovld __cnfn min(double3 x, double3 y); +@@ -10231,7 +10244,7 @@ double3 __ovld __cnfn min(double3 x, double y); + double4 __ovld __cnfn min(double4 x, double y); + double8 __ovld __cnfn min(double8 x, double y); + double16 __ovld __cnfn min(double16 x, double y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn min(half x, half y); + half2 __ovld __cnfn min(half2 x, half2 y); +@@ -10264,7 +10277,7 @@ float3 __ovld __cnfn mix(float3 x, float3 y, float a); + float4 __ovld __cnfn mix(float4 x, float4 y, float a); + float8 __ovld __cnfn mix(float8 x, float8 y, float a); + float16 __ovld __cnfn mix(float16 x, float16 y, float a); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn mix(double x, double y, double a); + double2 __ovld __cnfn mix(double2 x, double2 y, double2 a); + double3 __ovld __cnfn mix(double3 x, double3 y, double3 a); +@@ -10276,7 +10289,7 @@ double3 __ovld __cnfn mix(double3 x, double3 y, double a); + double4 __ovld __cnfn mix(double4 x, double4 y, double a); + double8 __ovld __cnfn mix(double8 x, double8 y, double a); + double16 __ovld __cnfn mix(double16 x, double16 y, double a); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn mix(half x, half y, half a); + half2 __ovld __cnfn mix(half2 x, half2 y, half2 a); +@@ -10301,14 +10314,14 @@ float3 __ovld __cnfn radians(float3 degrees); + float4 __ovld __cnfn radians(float4 degrees); + float8 __ovld __cnfn radians(float8 degrees); + float16 __ovld __cnfn radians(float16 degrees); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn radians(double degrees); + double2 __ovld __cnfn radians(double2 degrees); + double3 __ovld __cnfn radians(double3 degrees); + double4 __ovld __cnfn radians(double4 degrees); + double8 __ovld __cnfn radians(double8 degrees); + double16 __ovld __cnfn radians(double16 degrees); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn radians(half degrees); + half2 __ovld __cnfn radians(half2 degrees); +@@ -10332,7 +10345,7 @@ float3 __ovld __cnfn step(float edge, float3 x); + float4 __ovld __cnfn step(float edge, float4 x); + float8 __ovld __cnfn step(float edge, float8 x); + float16 __ovld __cnfn step(float edge, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn step(double edge, double x); + double2 __ovld __cnfn step(double2 edge, double2 x); + double3 __ovld __cnfn step(double3 edge, double3 x); +@@ -10344,7 +10357,7 @@ double3 __ovld __cnfn step(double edge, double3 x); + double4 __ovld __cnfn step(double edge, double4 x); + double8 __ovld __cnfn step(double edge, double8 x); + double16 __ovld __cnfn step(double edge, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn step(half edge, half x); + half2 __ovld __cnfn step(half2 edge, half2 x); +@@ -10383,7 +10396,7 @@ float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x); + float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x); + float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x); + float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn smoothstep(double edge0, double edge1, double x); + double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x); + double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x); +@@ -10395,7 +10408,7 @@ double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x); + double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x); + double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x); + double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn smoothstep(half edge0, half edge1, half x); + half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x); +@@ -10420,14 +10433,14 @@ float3 __ovld __cnfn sign(float3 x); + float4 __ovld __cnfn sign(float4 x); + float8 __ovld __cnfn sign(float8 x); + float16 __ovld __cnfn sign(float16 x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn sign(double x); + double2 __ovld __cnfn sign(double2 x); + double3 __ovld __cnfn sign(double3 x); + double4 __ovld __cnfn sign(double4 x); + double8 __ovld __cnfn sign(double8 x); + double16 __ovld __cnfn sign(double16 x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn sign(half x); + half2 __ovld __cnfn sign(half2 x); +@@ -10445,10 +10458,10 @@ half16 __ovld __cnfn sign(half16 x); + */ + float4 __ovld __cnfn cross(float4 p0, float4 p1); + float3 __ovld __cnfn cross(float3 p0, float3 p1); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double4 __ovld __cnfn cross(double4 p0, double4 p1); + double3 __ovld __cnfn cross(double3 p0, double3 p1); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half4 __ovld __cnfn cross(half4 p0, half4 p1); + half3 __ovld __cnfn cross(half3 p0, half3 p1); +@@ -10461,12 +10474,12 @@ float __ovld __cnfn dot(float p0, float p1); + float __ovld __cnfn dot(float2 p0, float2 p1); + float __ovld __cnfn dot(float3 p0, float3 p1); + float __ovld __cnfn dot(float4 p0, float4 p1); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn dot(double p0, double p1); + double __ovld __cnfn dot(double2 p0, double2 p1); + double __ovld __cnfn dot(double3 p0, double3 p1); + double __ovld __cnfn dot(double4 p0, double4 p1); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn dot(half p0, half p1); + half __ovld __cnfn dot(half2 p0, half2 p1); +@@ -10482,12 +10495,12 @@ float __ovld __cnfn distance(float p0, float p1); + float __ovld __cnfn distance(float2 p0, float2 p1); + float __ovld __cnfn distance(float3 p0, float3 p1); + float __ovld __cnfn distance(float4 p0, float4 p1); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn distance(double p0, double p1); + double __ovld __cnfn distance(double2 p0, double2 p1); + double __ovld __cnfn distance(double3 p0, double3 p1); + double __ovld __cnfn distance(double4 p0, double4 p1); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn distance(half p0, half p1); + half __ovld __cnfn distance(half2 p0, half2 p1); +@@ -10503,12 +10516,12 @@ float __ovld __cnfn length(float p); + float __ovld __cnfn length(float2 p); + float __ovld __cnfn length(float3 p); + float __ovld __cnfn length(float4 p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn length(double p); + double __ovld __cnfn length(double2 p); + double __ovld __cnfn length(double3 p); + double __ovld __cnfn length(double4 p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn length(half p); + half __ovld __cnfn length(half2 p); +@@ -10524,12 +10537,12 @@ float __ovld __cnfn normalize(float p); + float2 __ovld __cnfn normalize(float2 p); + float3 __ovld __cnfn normalize(float3 p); + float4 __ovld __cnfn normalize(float4 p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn normalize(double p); + double2 __ovld __cnfn normalize(double2 p); + double3 __ovld __cnfn normalize(double3 p); + double4 __ovld __cnfn normalize(double4 p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn normalize(half p); + half2 __ovld __cnfn normalize(half2 p); +@@ -10610,14 +10623,14 @@ int3 __ovld __cnfn isequal(float3 x, float3 y); + int4 __ovld __cnfn isequal(float4 x, float4 y); + int8 __ovld __cnfn isequal(float8 x, float8 y); + int16 __ovld __cnfn isequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isequal(double x, double y); + long2 __ovld __cnfn isequal(double2 x, double2 y); + long3 __ovld __cnfn isequal(double3 x, double3 y); + long4 __ovld __cnfn isequal(double4 x, double4 y); + long8 __ovld __cnfn isequal(double8 x, double8 y); + long16 __ovld __cnfn isequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isequal(half x, half y); + short2 __ovld __cnfn isequal(half2 x, half2 y); +@@ -10636,14 +10649,14 @@ int3 __ovld __cnfn isnotequal(float3 x, float3 y); + int4 __ovld __cnfn isnotequal(float4 x, float4 y); + int8 __ovld __cnfn isnotequal(float8 x, float8 y); + int16 __ovld __cnfn isnotequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isnotequal(double x, double y); + long2 __ovld __cnfn isnotequal(double2 x, double2 y); + long3 __ovld __cnfn isnotequal(double3 x, double3 y); + long4 __ovld __cnfn isnotequal(double4 x, double4 y); + long8 __ovld __cnfn isnotequal(double8 x, double8 y); + long16 __ovld __cnfn isnotequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isnotequal(half x, half y); + short2 __ovld __cnfn isnotequal(half2 x, half2 y); +@@ -10662,14 +10675,14 @@ int3 __ovld __cnfn isgreater(float3 x, float3 y); + int4 __ovld __cnfn isgreater(float4 x, float4 y); + int8 __ovld __cnfn isgreater(float8 x, float8 y); + int16 __ovld __cnfn isgreater(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isgreater(double x, double y); + long2 __ovld __cnfn isgreater(double2 x, double2 y); + long3 __ovld __cnfn isgreater(double3 x, double3 y); + long4 __ovld __cnfn isgreater(double4 x, double4 y); + long8 __ovld __cnfn isgreater(double8 x, double8 y); + long16 __ovld __cnfn isgreater(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isgreater(half x, half y); + short2 __ovld __cnfn isgreater(half2 x, half2 y); +@@ -10688,14 +10701,14 @@ int3 __ovld __cnfn isgreaterequal(float3 x, float3 y); + int4 __ovld __cnfn isgreaterequal(float4 x, float4 y); + int8 __ovld __cnfn isgreaterequal(float8 x, float8 y); + int16 __ovld __cnfn isgreaterequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isgreaterequal(double x, double y); + long2 __ovld __cnfn isgreaterequal(double2 x, double2 y); + long3 __ovld __cnfn isgreaterequal(double3 x, double3 y); + long4 __ovld __cnfn isgreaterequal(double4 x, double4 y); + long8 __ovld __cnfn isgreaterequal(double8 x, double8 y); + long16 __ovld __cnfn isgreaterequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isgreaterequal(half x, half y); + short2 __ovld __cnfn isgreaterequal(half2 x, half2 y); +@@ -10714,14 +10727,14 @@ int3 __ovld __cnfn isless(float3 x, float3 y); + int4 __ovld __cnfn isless(float4 x, float4 y); + int8 __ovld __cnfn isless(float8 x, float8 y); + int16 __ovld __cnfn isless(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isless(double x, double y); + long2 __ovld __cnfn isless(double2 x, double2 y); + long3 __ovld __cnfn isless(double3 x, double3 y); + long4 __ovld __cnfn isless(double4 x, double4 y); + long8 __ovld __cnfn isless(double8 x, double8 y); + long16 __ovld __cnfn isless(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isless(half x, half y); + short2 __ovld __cnfn isless(half2 x, half2 y); +@@ -10740,14 +10753,14 @@ int3 __ovld __cnfn islessequal(float3 x, float3 y); + int4 __ovld __cnfn islessequal(float4 x, float4 y); + int8 __ovld __cnfn islessequal(float8 x, float8 y); + int16 __ovld __cnfn islessequal(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn islessequal(double x, double y); + long2 __ovld __cnfn islessequal(double2 x, double2 y); + long3 __ovld __cnfn islessequal(double3 x, double3 y); + long4 __ovld __cnfn islessequal(double4 x, double4 y); + long8 __ovld __cnfn islessequal(double8 x, double8 y); + long16 __ovld __cnfn islessequal(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn islessequal(half x, half y); + short2 __ovld __cnfn islessequal(half2 x, half2 y); +@@ -10767,14 +10780,14 @@ int3 __ovld __cnfn islessgreater(float3 x, float3 y); + int4 __ovld __cnfn islessgreater(float4 x, float4 y); + int8 __ovld __cnfn islessgreater(float8 x, float8 y); + int16 __ovld __cnfn islessgreater(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn islessgreater(double x, double y); + long2 __ovld __cnfn islessgreater(double2 x, double2 y); + long3 __ovld __cnfn islessgreater(double3 x, double3 y); + long4 __ovld __cnfn islessgreater(double4 x, double4 y); + long8 __ovld __cnfn islessgreater(double8 x, double8 y); + long16 __ovld __cnfn islessgreater(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn islessgreater(half x, half y); + short2 __ovld __cnfn islessgreater(half2 x, half2 y); +@@ -10793,14 +10806,14 @@ int3 __ovld __cnfn isfinite(float3); + int4 __ovld __cnfn isfinite(float4); + int8 __ovld __cnfn isfinite(float8); + int16 __ovld __cnfn isfinite(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isfinite(double); + long2 __ovld __cnfn isfinite(double2); + long3 __ovld __cnfn isfinite(double3); + long4 __ovld __cnfn isfinite(double4); + long8 __ovld __cnfn isfinite(double8); + long16 __ovld __cnfn isfinite(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isfinite(half); + short2 __ovld __cnfn isfinite(half2); +@@ -10819,14 +10832,14 @@ int3 __ovld __cnfn isinf(float3); + int4 __ovld __cnfn isinf(float4); + int8 __ovld __cnfn isinf(float8); + int16 __ovld __cnfn isinf(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isinf(double); + long2 __ovld __cnfn isinf(double2); + long3 __ovld __cnfn isinf(double3); + long4 __ovld __cnfn isinf(double4); + long8 __ovld __cnfn isinf(double8); + long16 __ovld __cnfn isinf(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isinf(half); + short2 __ovld __cnfn isinf(half2); +@@ -10845,14 +10858,14 @@ int3 __ovld __cnfn isnan(float3); + int4 __ovld __cnfn isnan(float4); + int8 __ovld __cnfn isnan(float8); + int16 __ovld __cnfn isnan(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isnan(double); + long2 __ovld __cnfn isnan(double2); + long3 __ovld __cnfn isnan(double3); + long4 __ovld __cnfn isnan(double4); + long8 __ovld __cnfn isnan(double8); + long16 __ovld __cnfn isnan(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isnan(half); + short2 __ovld __cnfn isnan(half2); +@@ -10871,14 +10884,14 @@ int3 __ovld __cnfn isnormal(float3); + int4 __ovld __cnfn isnormal(float4); + int8 __ovld __cnfn isnormal(float8); + int16 __ovld __cnfn isnormal(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isnormal(double); + long2 __ovld __cnfn isnormal(double2); + long3 __ovld __cnfn isnormal(double3); + long4 __ovld __cnfn isnormal(double4); + long8 __ovld __cnfn isnormal(double8); + long16 __ovld __cnfn isnormal(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isnormal(half); + short2 __ovld __cnfn isnormal(half2); +@@ -10899,14 +10912,14 @@ int3 __ovld __cnfn isordered(float3 x, float3 y); + int4 __ovld __cnfn isordered(float4 x, float4 y); + int8 __ovld __cnfn isordered(float8 x, float8 y); + int16 __ovld __cnfn isordered(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isordered(double x, double y); + long2 __ovld __cnfn isordered(double2 x, double2 y); + long3 __ovld __cnfn isordered(double3 x, double3 y); + long4 __ovld __cnfn isordered(double4 x, double4 y); + long8 __ovld __cnfn isordered(double8 x, double8 y); + long16 __ovld __cnfn isordered(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isordered(half x, half y); + short2 __ovld __cnfn isordered(half2 x, half2 y); +@@ -10927,14 +10940,14 @@ int3 __ovld __cnfn isunordered(float3 x, float3 y); + int4 __ovld __cnfn isunordered(float4 x, float4 y); + int8 __ovld __cnfn isunordered(float8 x, float8 y); + int16 __ovld __cnfn isunordered(float16 x, float16 y); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn isunordered(double x, double y); + long2 __ovld __cnfn isunordered(double2 x, double2 y); + long3 __ovld __cnfn isunordered(double3 x, double3 y); + long4 __ovld __cnfn isunordered(double4 x, double4 y); + long8 __ovld __cnfn isunordered(double8 x, double8 y); + long16 __ovld __cnfn isunordered(double16 x, double16 y); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn isunordered(half x, half y); + short2 __ovld __cnfn isunordered(half2 x, half2 y); +@@ -10957,14 +10970,14 @@ int3 __ovld __cnfn signbit(float3); + int4 __ovld __cnfn signbit(float4); + int8 __ovld __cnfn signbit(float8); + int16 __ovld __cnfn signbit(float16); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + int __ovld __cnfn signbit(double); + long2 __ovld __cnfn signbit(double2); + long3 __ovld __cnfn signbit(double3); + long4 __ovld __cnfn signbit(double4); + long8 __ovld __cnfn signbit(double8); + long16 __ovld __cnfn signbit(double16); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + int __ovld __cnfn signbit(half); + short2 __ovld __cnfn signbit(half2); +@@ -11091,14 +11104,14 @@ float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c); + float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c); + float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c); + float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn bitselect(double a, double b, double c); + double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c); + double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c); + double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c); + double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c); + double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn bitselect(half a, half b, half c); + half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c); +@@ -11231,7 +11244,7 @@ ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c); + long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c); + ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __cnfn select(double a, double b, long c); + double2 __ovld __cnfn select(double2 a, double2 b, long2 c); + double3 __ovld __cnfn select(double3 a, double3 b, long3 c); +@@ -11244,7 +11257,7 @@ double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c); + double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c); + double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c); + double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + half __ovld __cnfn select(half a, half b, short c); + half2 __ovld __cnfn select(half2 a, half2 b, short2 c); +@@ -11323,13 +11336,13 @@ uint16 __ovld vload16(size_t offset, const __constant uint *p); + long16 __ovld vload16(size_t offset, const __constant long *p); + ulong16 __ovld vload16(size_t offset, const __constant ulong *p); + float16 __ovld vload16(size_t offset, const __constant float *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld vload2(size_t offset, const __constant double *p); + double3 __ovld vload3(size_t offset, const __constant double *p); + double4 __ovld vload4(size_t offset, const __constant double *p); + double8 __ovld vload8(size_t offset, const __constant double *p); + double16 __ovld vload16(size_t offset, const __constant double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld vload(size_t offset, const __constant half *p); +@@ -11340,7 +11353,7 @@ half8 __ovld vload8(size_t offset, const __constant half *p); + half16 __ovld vload16(size_t offset, const __constant half *p); + #endif //cl_khr_fp16 + +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + char2 __ovld vload2(size_t offset, const char *p); + uchar2 __ovld vload2(size_t offset, const uchar *p); + short2 __ovld vload2(size_t offset, const short *p); +@@ -11387,13 +11400,13 @@ long16 __ovld vload16(size_t offset, const long *p); + ulong16 __ovld vload16(size_t offset, const ulong *p); + float16 __ovld vload16(size_t offset, const float *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld vload2(size_t offset, const double *p); + double3 __ovld vload3(size_t offset, const double *p); + double4 __ovld vload4(size_t offset, const double *p); + double8 __ovld vload8(size_t offset, const double *p); + double16 __ovld vload16(size_t offset, const double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld vload(size_t offset, const half *p); +@@ -11403,7 +11416,7 @@ half4 __ovld vload4(size_t offset, const half *p); + half8 __ovld vload8(size_t offset, const half *p); + half16 __ovld vload16(size_t offset, const half *p); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space + char2 __ovld vload2(size_t offset, const __global char *p); + uchar2 __ovld vload2(size_t offset, const __global uchar *p); + short2 __ovld vload2(size_t offset, const __global short *p); +@@ -11540,7 +11553,7 @@ long16 __ovld vload16(size_t offset, const __private long *p); + ulong16 __ovld vload16(size_t offset, const __private ulong *p); + float16 __ovld vload16(size_t offset, const __private float *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld vload2(size_t offset, const __global double *p); + double3 __ovld vload3(size_t offset, const __global double *p); + double4 __ovld vload4(size_t offset, const __global double *p); +@@ -11556,7 +11569,7 @@ double3 __ovld vload3(size_t offset, const __private double *p); + double4 __ovld vload4(size_t offset, const __private double *p); + double8 __ovld vload8(size_t offset, const __private double *p); + double16 __ovld vload16(size_t offset, const __private double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld vload(size_t offset, const __global half *p); +@@ -11578,9 +11591,8 @@ half4 __ovld vload4(size_t offset, const __private half *p); + half8 __ovld vload8(size_t offset, const __private half *p); + half16 __ovld vload16(size_t offset, const __private half *p); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + void __ovld vstore2(char2 data, size_t offset, char *p); + void __ovld vstore2(uchar2 data, size_t offset, uchar *p); + void __ovld vstore2(short2 data, size_t offset, short *p); +@@ -11626,13 +11638,13 @@ void __ovld vstore16(uint16 data, size_t offset, uint *p); + void __ovld vstore16(long16 data, size_t offset, long *p); + void __ovld vstore16(ulong16 data, size_t offset, ulong *p); + void __ovld vstore16(float16 data, size_t offset, float *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore2(double2 data, size_t offset, double *p); + void __ovld vstore3(double3 data, size_t offset, double *p); + void __ovld vstore4(double4 data, size_t offset, double *p); + void __ovld vstore8(double8 data, size_t offset, double *p); + void __ovld vstore16(double16 data, size_t offset, double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + void __ovld vstore(half data, size_t offset, half *p); + void __ovld vstore2(half2 data, size_t offset, half *p); +@@ -11641,7 +11653,7 @@ void __ovld vstore4(half4 data, size_t offset, half *p); + void __ovld vstore8(half8 data, size_t offset, half *p); + void __ovld vstore16(half16 data, size_t offset, half *p); + #endif //cl_khr_fp16 +-#else ++#endif //__opencl_c_generic_address_space + void __ovld vstore2(char2 data, size_t offset, __global char *p); + void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p); + void __ovld vstore2(short2 data, size_t offset, __global short *p); +@@ -11777,7 +11789,7 @@ void __ovld vstore16(uint16 data, size_t offset, __private uint *p); + void __ovld vstore16(long16 data, size_t offset, __private long *p); + void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p); + void __ovld vstore16(float16 data, size_t offset, __private float *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore2(double2 data, size_t offset, __global double *p); + void __ovld vstore3(double3 data, size_t offset, __global double *p); + void __ovld vstore4(double4 data, size_t offset, __global double *p); +@@ -11793,7 +11805,7 @@ void __ovld vstore3(double3 data, size_t offset, __private double *p); + void __ovld vstore4(double4 data, size_t offset, __private double *p); + void __ovld vstore8(double8 data, size_t offset, __private double *p); + void __ovld vstore16(double16 data, size_t offset, __private double *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + void __ovld vstore(half data, size_t offset, __global half *p); + void __ovld vstore2(half2 data, size_t offset, __global half *p); +@@ -11814,7 +11826,6 @@ void __ovld vstore4(half4 data, size_t offset, __private half *p); + void __ovld vstore8(half8 data, size_t offset, __private half *p); + void __ovld vstore16(half16 data, size_t offset, __private half *p); + #endif //cl_khr_fp16 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Read sizeof (half) bytes of data from address +@@ -11825,13 +11836,12 @@ void __ovld vstore16(half16 data, size_t offset, __private half *p); + * must be 16-bit aligned. + */ + float __ovld vload_half(size_t offset, const __constant half *p); +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld vload_half(size_t offset, const half *p); +-#else ++#endif //__opencl_c_generic_address_space + float __ovld vload_half(size_t offset, const __global half *p); + float __ovld vload_half(size_t offset, const __local half *p); + float __ovld vload_half(size_t offset, const __private half *p); +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * Read sizeof (halfn) bytes of data from address +@@ -11846,13 +11856,13 @@ float3 __ovld vload_half3(size_t offset, const __constant half *p); + float4 __ovld vload_half4(size_t offset, const __constant half *p); + float8 __ovld vload_half8(size_t offset, const __constant half *p); + float16 __ovld vload_half16(size_t offset, const __constant half *p); +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float2 __ovld vload_half2(size_t offset, const half *p); + float3 __ovld vload_half3(size_t offset, const half *p); + float4 __ovld vload_half4(size_t offset, const half *p); + float8 __ovld vload_half8(size_t offset, const half *p); + float16 __ovld vload_half16(size_t offset, const half *p); +-#else ++#endif //__opencl_c_generic_address_space + float2 __ovld vload_half2(size_t offset, const __global half *p); + float3 __ovld vload_half3(size_t offset, const __global half *p); + float4 __ovld vload_half4(size_t offset, const __global half *p); +@@ -11868,7 +11878,6 @@ float3 __ovld vload_half3(size_t offset, const __private half *p); + float4 __ovld vload_half4(size_t offset, const __private half *p); + float8 __ovld vload_half8(size_t offset, const __private half *p); + float16 __ovld vload_half16(size_t offset, const __private half *p); +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * The float value given by data is first +@@ -11881,20 +11890,20 @@ float16 __ovld vload_half16(size_t offset, const __private half *p); + * The default current rounding mode is round to + * nearest even. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + void __ovld vstore_half(float data, size_t offset, half *p); + void __ovld vstore_half_rte(float data, size_t offset, half *p); + void __ovld vstore_half_rtz(float data, size_t offset, half *p); + void __ovld vstore_half_rtp(float data, size_t offset, half *p); + void __ovld vstore_half_rtn(float data, size_t offset, half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half(double data, size_t offset, half *p); + void __ovld vstore_half_rte(double data, size_t offset, half *p); + void __ovld vstore_half_rtz(double data, size_t offset, half *p); + void __ovld vstore_half_rtp(double data, size_t offset, half *p); + void __ovld vstore_half_rtn(double data, size_t offset, half *p); +-#endif //cl_khr_fp64 +-#else ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_generic_address_space + void __ovld vstore_half(float data, size_t offset, __global half *p); + void __ovld vstore_half_rte(float data, size_t offset, __global half *p); + void __ovld vstore_half_rtz(float data, size_t offset, __global half *p); +@@ -11910,7 +11919,7 @@ void __ovld vstore_half_rte(float data, size_t offset, __private half *p); + void __ovld vstore_half_rtz(float data, size_t offset, __private half *p); + void __ovld vstore_half_rtp(float data, size_t offset, __private half *p); + void __ovld vstore_half_rtn(float data, size_t offset, __private half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half(double data, size_t offset, __global half *p); + void __ovld vstore_half_rte(double data, size_t offset, __global half *p); + void __ovld vstore_half_rtz(double data, size_t offset, __global half *p); +@@ -11926,8 +11935,7 @@ void __ovld vstore_half_rte(double data, size_t offset, __private half *p); + void __ovld vstore_half_rtz(double data, size_t offset, __private half *p); + void __ovld vstore_half_rtp(double data, size_t offset, __private half *p); + void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); +-#endif //cl_khr_fp64 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + /** + * The floatn value given by data is converted to +@@ -11940,7 +11948,7 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); + * The default current rounding mode is round to + * nearest even. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + void __ovld vstore_half2(float2 data, size_t offset, half *p); + void __ovld vstore_half3(float3 data, size_t offset, half *p); + void __ovld vstore_half4(float4 data, size_t offset, half *p); +@@ -11966,7 +11974,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p); + void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p); + void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p); + void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half2(double2 data, size_t offset, half *p); + void __ovld vstore_half3(double3 data, size_t offset, half *p); + void __ovld vstore_half4(double4 data, size_t offset, half *p); +@@ -11992,8 +12000,8 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p); + void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p); + void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p); + void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p); +-#endif //cl_khr_fp64 +-#else ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_generic_address_space + void __ovld vstore_half2(float2 data, size_t offset, __global half *p); + void __ovld vstore_half3(float3 data, size_t offset, __global half *p); + void __ovld vstore_half4(float4 data, size_t offset, __global half *p); +@@ -12069,7 +12077,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p); + void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p); + void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p); + void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstore_half2(double2 data, size_t offset, __global half *p); + void __ovld vstore_half3(double3 data, size_t offset, __global half *p); + void __ovld vstore_half4(double4 data, size_t offset, __global half *p); +@@ -12145,8 +12153,7 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p); + void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p); + void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p); + void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p); +-#endif //cl_khr_fp64 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + /** + * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) +@@ -12167,14 +12174,14 @@ float3 __ovld vloada_half3(size_t offset, const __constant half *p); + float4 __ovld vloada_half4(size_t offset, const __constant half *p); + float8 __ovld vloada_half8(size_t offset, const __constant half *p); + float16 __ovld vloada_half16(size_t offset, const __constant half *p); +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + float __ovld vloada_half(size_t offset, const half *p); + float2 __ovld vloada_half2(size_t offset, const half *p); + float3 __ovld vloada_half3(size_t offset, const half *p); + float4 __ovld vloada_half4(size_t offset, const half *p); + float8 __ovld vloada_half8(size_t offset, const half *p); + float16 __ovld vloada_half16(size_t offset, const half *p); +-#else ++#endif //__opencl_c_generic_address_space + float __ovld vloada_half(size_t offset, const __global half *p); + float2 __ovld vloada_half2(size_t offset, const __global half *p); + float3 __ovld vloada_half3(size_t offset, const __global half *p); +@@ -12193,7 +12200,6 @@ float3 __ovld vloada_half3(size_t offset, const __private half *p); + float4 __ovld vloada_half4(size_t offset, const __private half *p); + float8 __ovld vloada_half8(size_t offset, const __private half *p); + float16 __ovld vloada_half16(size_t offset, const __private half *p); +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** + * The floatn value given by data is converted to +@@ -12211,7 +12217,7 @@ float16 __ovld vloada_half16(size_t offset, const __private half *p); + * mode. The default current rounding mode is + * round to nearest even. + */ +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + void __ovld vstorea_half(float data, size_t offset, half *p); + void __ovld vstorea_half2(float2 data, size_t offset, half *p); + void __ovld vstorea_half3(float3 data, size_t offset, half *p); +@@ -12247,7 +12253,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p); + void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p); + void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstorea_half(double data, size_t offset, half *p); + void __ovld vstorea_half2(double2 data, size_t offset, half *p); + void __ovld vstorea_half3(double3 data, size_t offset, half *p); +@@ -12282,9 +12288,9 @@ void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p); + void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p); + void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p); + void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_generic_address_space + +-#else + void __ovld vstorea_half(float data, size_t offset, __global half *p); + void __ovld vstorea_half2(float2 data, size_t offset, __global half *p); + void __ovld vstorea_half3(float3 data, size_t offset, __global half *p); +@@ -12390,7 +12396,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p); + void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p); + void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld vstorea_half(double data, size_t offset, __global half *p); + void __ovld vstorea_half2(double2 data, size_t offset, __global half *p); + void __ovld vstorea_half3(double3 data, size_t offset, __global half *p); +@@ -12495,8 +12501,7 @@ void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p); + void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p); + void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p); + void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p); +-#endif //cl_khr_fp64 +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions + +@@ -12580,7 +12585,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags); + + // OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions + +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_generic_address_space + cl_mem_fence_flags __ovld get_fence(const void *ptr); + cl_mem_fence_flags __ovld get_fence(void *ptr); + +@@ -12591,7 +12596,7 @@ cl_mem_fence_flags __ovld get_fence(void *ptr); + * where gentype is builtin type or user defined type. + */ + +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#endif //__opencl_c_generic_address_space + + // OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch + +@@ -12730,7 +12735,7 @@ event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 + event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event); +@@ -12743,7 +12748,7 @@ event_t __ovld async_work_group_copy(__global double3 *dst, const __local double + event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event); + event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event); +@@ -12893,7 +12898,7 @@ event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local + event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event); +@@ -12906,7 +12911,7 @@ event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __loca + event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event); + event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event); +@@ -12996,14 +13001,14 @@ void __ovld prefetch(const __global uint16 *p, size_t num_elements); + void __ovld prefetch(const __global long16 *p, size_t num_elements); + void __ovld prefetch(const __global ulong16 *p, size_t num_elements); + void __ovld prefetch(const __global float16 *p, size_t num_elements); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld prefetch(const __global double *p, size_t num_elements); + void __ovld prefetch(const __global double2 *p, size_t num_elements); + void __ovld prefetch(const __global double3 *p, size_t num_elements); + void __ovld prefetch(const __global double4 *p, size_t num_elements); + void __ovld prefetch(const __global double8 *p, size_t num_elements); + void __ovld prefetch(const __global double16 *p, size_t num_elements); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #ifdef cl_khr_fp16 + void __ovld prefetch(const __global half *p, size_t num_elements); + void __ovld prefetch(const __global half2 *p, size_t num_elements); +@@ -13026,9 +13031,11 @@ void __ovld prefetch(const __global half16 *p, size_t num_elements); + * pointed by p. The function returns old. + */ + int __ovld atomic_add(volatile __global int *p, int val); +-unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_add(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_add(volatile __local int *p, int val); +-unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_add(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_add(volatile int *p, int val); + unsigned int __ovld atomic_add(volatile unsigned int *p, unsigned int val); +@@ -13056,9 +13063,11 @@ unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long v + * returns old. + */ + int __ovld atomic_sub(volatile __global int *p, int val); +-unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_sub(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_sub(volatile __local int *p, int val); +-unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_sub(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_sub(volatile int *p, int val); + unsigned int __ovld atomic_sub(volatile unsigned int *p, unsigned int val); +@@ -13086,9 +13095,11 @@ unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long v + * value. + */ + int __ovld atomic_xchg(volatile __global int *p, int val); +-unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_xchg(volatile __local int *p, int val); +-unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, ++ unsigned int val); + float __ovld atomic_xchg(volatile __global float *p, float val); + float __ovld atomic_xchg(volatile __local float *p, float val); + #ifdef __OPENCL_CPP_VERSION__ +@@ -13183,12 +13194,15 @@ unsigned long __ovld atom_dec(volatile __local unsigned long *p); + * returns old. + */ + int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val); +-unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val); ++unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, ++ unsigned int cmp, unsigned int val); + int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val); +-unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val); ++unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, ++ unsigned int cmp, unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_cmpxchg(volatile int *p, int cmp, int val); +-unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, unsigned int val); ++unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, ++ unsigned int val); + #endif + + #if defined(cl_khr_global_int32_base_atomics) +@@ -13215,9 +13229,11 @@ unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned lo + * returns old. + */ + int __ovld atomic_min(volatile __global int *p, int val); +-unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_min(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_min(volatile __local int *p, int val); +-unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_min(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_min(volatile int *p, int val); + unsigned int __ovld atomic_min(volatile unsigned int *p, unsigned int val); +@@ -13247,9 +13263,11 @@ unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long v + * returns old. + */ + int __ovld atomic_max(volatile __global int *p, int val); +-unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_max(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_max(volatile __local int *p, int val); +-unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_max(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_max(volatile int *p, int val); + unsigned int __ovld atomic_max(volatile unsigned int *p, unsigned int val); +@@ -13278,9 +13296,11 @@ unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long v + * pointed by p. The function returns old. + */ + int __ovld atomic_and(volatile __global int *p, int val); +-unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_and(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_and(volatile __local int *p, int val); +-unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_and(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_and(volatile int *p, int val); + unsigned int __ovld atomic_and(volatile unsigned int *p, unsigned int val); +@@ -13309,9 +13329,11 @@ unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long v + * pointed by p. The function returns old. + */ + int __ovld atomic_or(volatile __global int *p, int val); +-unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_or(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_or(volatile __local int *p, int val); +-unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_or(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_or(volatile int *p, int val); + unsigned int __ovld atomic_or(volatile unsigned int *p, unsigned int val); +@@ -13340,9 +13362,11 @@ unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long va + * pointed by p. The function returns old. + */ + int __ovld atomic_xor(volatile __global int *p, int val); +-unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xor(volatile __global unsigned int *p, ++ unsigned int val); + int __ovld atomic_xor(volatile __local int *p, int val); +-unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val); ++unsigned int __ovld atomic_xor(volatile __local unsigned int *p, ++ unsigned int val); + #ifdef __OPENCL_CPP_VERSION__ + int __ovld atomic_xor(volatile int *p, int val); + unsigned int __ovld atomic_xor(volatile unsigned int *p, unsigned int val); +@@ -13380,108 +13404,78 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v + #endif + + // atomic_init() ++#ifdef __opencl_c_generic_address_space + void __ovld atomic_init(volatile atomic_int *object, int value); + void __ovld atomic_init(volatile atomic_uint *object, uint value); + void __ovld atomic_init(volatile atomic_float *object, float value); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) + void __ovld atomic_init(volatile atomic_long *object, long value); + void __ovld atomic_init(volatile atomic_ulong *object, ulong value); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld atomic_init(volatile atomic_double *object, double value); +-#endif //cl_khr_fp64 +-#endif ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++void __ovld atomic_init(volatile atomic_int __global *object, int value); ++void __ovld atomic_init(volatile atomic_int __local *object, int value); ++void __ovld atomic_init(volatile atomic_uint __global *object, uint value); ++void __ovld atomic_init(volatile atomic_uint __local *object, uint value); ++void __ovld atomic_init(volatile atomic_float __global *object, float value); ++void __ovld atomic_init(volatile atomic_float __local *object, float value); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++void __ovld atomic_init(volatile atomic_long __global *object, long value); ++void __ovld atomic_init(volatile atomic_long __local *object, long value); ++void __ovld atomic_init(volatile atomic_ulong __global *object, ulong value); ++void __ovld atomic_init(volatile atomic_ulong __local *object, ulong value); ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++void __ovld atomic_init(volatile atomic_double __global *object, double value); ++void __ovld atomic_init(volatile atomic_double __local *object, double value); ++#endif // cl_khr_fp64 ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_work_item_fence() +-void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope); ++void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, ++ memory_scope scope); + + // atomic_fetch() +- ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++#ifdef __opencl_c_generic_address_space + int __ovld atomic_fetch_add(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_or(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_and(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_min(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + int __ovld atomic_fetch_max(volatile atomic_int *object, int operand); +-int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order); +-int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); + uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand); +-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order); +-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); + + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) + long __ovld atomic_fetch_add(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_or(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_and(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_min(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + long __ovld atomic_fetch_max(volatile atomic_long *object, long operand); +-long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order); +-long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); + ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand); +-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); + #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) + + // OpenCL v2.0 s6.13.11.7.5: +@@ -13489,196 +13483,2239 @@ ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong oper + // or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. + + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand); +-uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand); +-uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); +- +-uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand); +-uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand); +-uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand); +-uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); +-uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); +-uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax); +-uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); +-uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); +-uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax); +-uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); +-uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); +- +-intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand); +-intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); +-intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); +-intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand); +-intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); +-intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); +-intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand); +-intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); +-intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); +-intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax); +-intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); +-intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); +-intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax); +-intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); +-intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand); ++ ++uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, ++ intptr_t opermax); ++ ++intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, ++ uintptr_t opermax); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if(__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++int __ovld atomic_fetch_add(volatile atomic_int __global *object, int operand); ++uint __ovld atomic_fetch_add(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_sub(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_sub(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object, ++ uint operand); ++uint __ovld atomic_fetch_sub(volatile atomic_uint __global *object, ++ uint operand); ++int __ovld atomic_fetch_or(volatile atomic_int __global *object, int operand); ++uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object, ++ uint operand); ++uint __ovld atomic_fetch_or(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_or(volatile atomic_uint __local *object, uint operand); ++int __ovld atomic_fetch_xor(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_xor(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_xor(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_xor(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_and(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_and(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_and(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_and(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_min(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_min(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_min(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_min(volatile atomic_uint __local *object, ++ uint operand); ++int __ovld atomic_fetch_max(volatile atomic_int __global *object, int operand); ++int __ovld atomic_fetch_max(volatile atomic_int __local *object, int operand); ++uint __ovld atomic_fetch_max(volatile atomic_uint __global *object, ++ uint operand); ++uint __ovld atomic_fetch_max(volatile atomic_uint __local *object, ++ uint operand); ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++long __ovld atomic_fetch_add(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_add(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_add(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_add(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_sub(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_sub(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_sub(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_sub(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_or(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_or(volatile atomic_long __local *object, long operand); ++ulong __ovld atomic_fetch_or(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_or(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_xor(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_xor(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_xor(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_xor(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_and(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_and(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_and(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_and(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_min(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_min(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_min(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_min(volatile atomic_ulong __local *object, ++ ulong operand); ++long __ovld atomic_fetch_max(volatile atomic_long __global *object, ++ long operand); ++long __ovld atomic_fetch_max(volatile atomic_long __local *object, ++ long operand); ++ulong __ovld atomic_fetch_max(volatile atomic_ulong __global *object, ++ ulong operand); ++ulong __ovld atomic_fetch_max(volatile atomic_ulong __local *object, ++ ulong operand); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++ ++// OpenCL v2.0 s6.13.11.7.5: ++// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument ++// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be ++// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand); ++uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand); ++ ++uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __global *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __local *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __global *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __local *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __global *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __local *object, ++ intptr_t operand); ++uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax); ++uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax); ++ ++intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __global *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __local *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __global *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __local *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __global *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __local *object, ++ uintptr_t operand); ++intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax); ++intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, ++ memory_order order); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, ++ memory_order order); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, ++ memory_order order, memory_scope scope); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, ++ memory_order order); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++ ++// OpenCL v2.0 s6.13.11.7.5: ++// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument ++// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be ++// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, ++ memory_order order); ++uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder); ++uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder); ++intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, ++ memory_order order); ++intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, ++ memory_order order); ++intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder); ++intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder); ++#endif // __opencl_c_atomic_scope_device ++uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ++ ptrdiff_t operand, ++ memory_order order, ++ memory_scope scope); ++ ++uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, ++ intptr_t operand, memory_order order, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder, ++ memory_scope scope); ++uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, ++ intptr_t opermax, ++ memory_order minder, ++ memory_scope scope); ++ ++intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, ++ uintptr_t operand, memory_order order, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder, ++ memory_scope scope); ++intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, ++ uintptr_t opermax, ++ memory_order minder, ++ memory_scope scope); + #endif ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object, ++ int operand, memory_order order, ++ memory_scope scope); ++int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object, ++ int operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object, ++ uint operand, memory_order order, ++ memory_scope scope); ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object, ++ long operand, memory_order order, ++ memory_scope scope); ++long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object, ++ long operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object, ++ ulong operand, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++ ++// OpenCL v2.0 s6.13.11.7.5: ++// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument ++// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be ++// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. ++ ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#ifdef __opencl_c_atomic_scope_device ++uintptr_t __ovld ++atomic_fetch_add_explicit(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_add_explicit(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_sub_explicit(volatile atomic_uintptr_t __global *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_sub_explicit(volatile atomic_uintptr_t __local *object, ++ ptrdiff_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t operand, memory_order order); ++uintptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax, memory_order minder); ++uintptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax, memory_order minder); ++uintptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_uintptr_t __global *object, ++ intptr_t opermax, memory_order minder); ++uintptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_uintptr_t __local *object, ++ intptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_or_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_xor_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_and_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t operand, memory_order order); ++intptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_min_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_intptr_t __global *object, ++ uintptr_t opermax, memory_order minder); ++intptr_t __ovld ++atomic_fetch_max_explicit(volatile atomic_intptr_t __local *object, ++ uintptr_t opermax, memory_order minder); ++#endif // __opencl_c_atomic_scope_device ++uintptr_t __ovld atomic_fetch_add_explicit( ++ volatile atomic_uintptr_t __global *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_add_explicit( ++ volatile atomic_uintptr_t __local *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_sub_explicit( ++ volatile atomic_uintptr_t __global *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_sub_explicit( ++ volatile atomic_uintptr_t __local *object, ptrdiff_t operand, ++ memory_order order, memory_scope scope); ++ ++uintptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t operand, ++ memory_order order, memory_scope scope); ++uintptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_uintptr_t __global *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++uintptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_uintptr_t __local *object, intptr_t opermax, ++ memory_order minder, memory_scope scope); ++ ++intptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_or_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_xor_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_and_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t operand, ++ memory_order order, memory_scope scope); ++intptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++intptr_t __ovld atomic_fetch_min_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++intptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_intptr_t __global *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++intptr_t __ovld atomic_fetch_max_explicit( ++ volatile atomic_intptr_t __local *object, uintptr_t opermax, ++ memory_order minder, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_store() + ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space + void __ovld atomic_store(volatile atomic_int *object, int desired); +-void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); + void __ovld atomic_store(volatile atomic_uint *object, uint desired); +-void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); + void __ovld atomic_store(volatile atomic_float *object, float desired); +-void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + void __ovld atomic_store(volatile atomic_double *object, double desired); +-void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); +-#endif //cl_khr_fp64 ++#endif + void __ovld atomic_store(volatile atomic_long *object, long desired); +-void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); + void __ovld atomic_store(volatile atomic_ulong *object, ulong desired); +-void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); +-void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++void __ovld atomic_store(volatile atomic_int __global *object, int desired); ++void __ovld atomic_store(volatile atomic_int __local *object, int desired); ++void __ovld atomic_store(volatile atomic_uint __global *object, uint desired); ++void __ovld atomic_store(volatile atomic_uint __local *object, uint desired); ++void __ovld atomic_store(volatile atomic_float __global *object, float desired); ++void __ovld atomic_store(volatile atomic_float __local *object, float desired); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++void __ovld atomic_store(volatile atomic_double __global *object, ++ double desired); ++void __ovld atomic_store(volatile atomic_double __local *object, ++ double desired); + #endif ++void __ovld atomic_store(volatile atomic_long __global *object, long desired); ++void __ovld atomic_store(volatile atomic_long __local *object, long desired); ++void __ovld atomic_store(volatile atomic_ulong __global *object, ulong desired); ++void __ovld atomic_store(volatile atomic_ulong __local *object, ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, ++ memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, ++ memory_order order, memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order, memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, ++ memory_order order, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double *object, ++ double desired, memory_order order); ++#endif //__opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, ++ memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, ++ memory_order order); ++#endif //__opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, ++ memory_order order, memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order); ++#endif //__opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // cl_khr_fp64 ++#ifdef __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order); ++void __ovld atomic_store_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++void __ovld atomic_store_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++void __ovld atomic_store_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_load() +- ++#ifdef __opencl_c_generic_address_space ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) + int __ovld atomic_load(volatile atomic_int *object); +-int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order); +-int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope); + uint __ovld atomic_load(volatile atomic_uint *object); +-uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order); +-uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope); + float __ovld atomic_load(volatile atomic_float *object); +-float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order); +-float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld atomic_load(volatile atomic_double *object); +-double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order); +-double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope); +-#endif //cl_khr_fp64 ++#endif // cl_khr_fp64 + long __ovld atomic_load(volatile atomic_long *object); +-long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order); +-long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope); + ulong __ovld atomic_load(volatile atomic_ulong *object); +-ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order); +-ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope); +-#endif ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif //__opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++int __ovld atomic_load(volatile atomic_int __global *object); ++int __ovld atomic_load(volatile atomic_int __local *object); ++uint __ovld atomic_load(volatile atomic_uint __global *object); ++uint __ovld atomic_load(volatile atomic_uint __local *object); ++float __ovld atomic_load(volatile atomic_float __global *object); ++float __ovld atomic_load(volatile atomic_float __local *object); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++double __ovld atomic_load(volatile atomic_double __global *object); ++double __ovld atomic_load(volatile atomic_double __local *object); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++long __ovld atomic_load(volatile atomic_long __global *object); ++long __ovld atomic_load(volatile atomic_long __local *object); ++ulong __ovld atomic_load(volatile atomic_ulong __global *object); ++ulong __ovld atomic_load(volatile atomic_ulong __local *object); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int *object, ++ memory_order order); ++uint __ovld atomic_load_explicit(volatile atomic_uint *object, ++ memory_order order); ++float __ovld atomic_load_explicit(volatile atomic_float *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_load_explicit(volatile atomic_uint *object, ++ memory_order order, memory_scope scope); ++float __ovld atomic_load_explicit(volatile atomic_float *object, ++ memory_order order, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long *object, ++ memory_order order); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, ++ memory_order order); ++#endif //__opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long *object, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int __global *object, ++ memory_order order); ++int __ovld atomic_load_explicit(volatile atomic_int __local *object, ++ memory_order order); ++uint __ovld atomic_load_explicit(volatile atomic_uint __global *object, ++ memory_order order); ++uint __ovld atomic_load_explicit(volatile atomic_uint __local *object, ++ memory_order order); ++float __ovld atomic_load_explicit(volatile atomic_float __global *object, ++ memory_order order); ++float __ovld atomic_load_explicit(volatile atomic_float __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_load_explicit(volatile atomic_int __global *object, ++ memory_order order, memory_scope scope); ++int __ovld atomic_load_explicit(volatile atomic_int __local *object, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_load_explicit(volatile atomic_uint __global *object, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_load_explicit(volatile atomic_uint __local *object, ++ memory_order order, memory_scope scope); ++float __ovld atomic_load_explicit(volatile atomic_float __global *object, ++ memory_order order, memory_scope scope); ++float __ovld atomic_load_explicit(volatile atomic_float __local *object, ++ memory_order order, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double __global *object, ++ memory_order order); ++double __ovld atomic_load_explicit(volatile atomic_double __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_load_explicit(volatile atomic_double __global *object, ++ memory_order order, memory_scope scope); ++double __ovld atomic_load_explicit(volatile atomic_double __local *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long __global *object, ++ memory_order order); ++long __ovld atomic_load_explicit(volatile atomic_long __local *object, ++ memory_order order); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object, ++ memory_order order); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_load_explicit(volatile atomic_long __global *object, ++ memory_order order, memory_scope scope); ++long __ovld atomic_load_explicit(volatile atomic_long __local *object, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object, ++ memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_exchange() +- ++#ifdef __opencl_c_generic_address_space ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) + int __ovld atomic_exchange(volatile atomic_int *object, int desired); +-int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order); +-int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); + uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired); +-uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order); +-uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); + float __ovld atomic_exchange(volatile atomic_float *object, float desired); +-float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order); +-float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld atomic_exchange(volatile atomic_double *object, double desired); +-double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order); +-double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); +-#endif //cl_khr_fp64 ++#endif + long __ovld atomic_exchange(volatile atomic_long *object, long desired); +-long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order); +-long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); + ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired); +-ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); +-ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++int __ovld atomic_exchange(volatile atomic_int __global *object, int desired); ++int __ovld atomic_exchange(volatile atomic_int __local *object, int desired); ++uint __ovld atomic_exchange(volatile atomic_uint __global *object, ++ uint desired); ++uint __ovld atomic_exchange(volatile atomic_uint __local *object, uint desired); ++float __ovld atomic_exchange(volatile atomic_float __global *object, ++ float desired); ++float __ovld atomic_exchange(volatile atomic_float __local *object, ++ float desired); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++double __ovld atomic_exchange(volatile atomic_double __global *object, ++ double desired); ++double __ovld atomic_exchange(volatile atomic_double __local *object, ++ double desired); + #endif ++long __ovld atomic_exchange(volatile atomic_long __global *object, ++ long desired); ++long __ovld atomic_exchange(volatile atomic_long __local *object, long desired); ++ulong __ovld atomic_exchange(volatile atomic_ulong __global *object, ++ ulong desired); ++ulong __ovld atomic_exchange(volatile atomic_ulong __local *object, ++ ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, ++ memory_order order); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order); ++float __ovld atomic_exchange_explicit(volatile atomic_float *object, ++ float desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, ++ memory_order order, memory_scope scope); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, ++ memory_order order, memory_scope scope); ++float __ovld atomic_exchange_explicit(volatile atomic_float *object, ++ float desired, memory_order order, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double *object, ++ double desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, ++ memory_order order); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ++ ulong desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, ++ memory_order order, memory_scope scope); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order); ++int __ovld atomic_exchange_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order); ++float __ovld atomic_exchange_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order); ++float __ovld atomic_exchange_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++int __ovld atomic_exchange_explicit(volatile atomic_int __global *object, ++ int desired, memory_order order, ++ memory_scope scope); ++int __ovld atomic_exchange_explicit(volatile atomic_int __local *object, ++ int desired, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object, ++ uint desired, memory_order order, ++ memory_scope scope); ++float __ovld atomic_exchange_explicit(volatile atomic_float __global *object, ++ float desired, memory_order order, ++ memory_scope scope); ++float __ovld atomic_exchange_explicit(volatile atomic_float __local *object, ++ float desired, memory_order order, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order); ++double __ovld atomic_exchange_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++double __ovld atomic_exchange_explicit(volatile atomic_double __global *object, ++ double desired, memory_order order, ++ memory_scope scope); ++double __ovld atomic_exchange_explicit(volatile atomic_double __local *object, ++ double desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order); ++long __ovld atomic_exchange_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order); ++#endif // __opencl_c_atomic_scope_device ++long __ovld atomic_exchange_explicit(volatile atomic_long __global *object, ++ long desired, memory_order order, ++ memory_scope scope); ++long __ovld atomic_exchange_explicit(volatile atomic_long __local *object, ++ long desired, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object, ++ ulong desired, memory_order order, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_compare_exchange_strong() and atomic_compare_exchange_weak() +- +-bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, +- int desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, +- uint desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, +- float desired, memory_order success, memory_order failure, memory_scope scope); ++#ifdef __opencl_c_generic_address_space ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, ++ int *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, ++ uint *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, ++ int *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, ++ uint *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, ++ float *expected, float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, ++ float *expected, float desired); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, ++ double *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, ++ double *expected, double desired); ++#endif ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, ++ long *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, ++ long *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ++ ulong *expected, ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ++ ulong *expected, ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, ++ int __private *expected, ++ int desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, ++ int __private *expected, ++ int desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_uint __global *object, ++ uint __global *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, ++ uint __global *expected, ++ uint desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_uint __global *object, ++ uint __local *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, ++ uint __local *expected, ++ uint desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_uint __global *object, ++ uint __private *expected, uint desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, ++ uint __private *expected, ++ uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, ++ int __global *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, ++ int __local *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, ++ int __private *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, ++ int __private *expected, int desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, ++ uint __global *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, ++ uint __global *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, ++ uint __local *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, ++ uint __local *expected, uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, ++ uint __private *expected, ++ uint desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, ++ uint __private *expected, ++ uint desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __global *object, ++ float __global *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __local *object, ++ float __global *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __global *object, ++ float __local *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __local *object, ++ float __local *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __global *object, ++ float __private *expected, float desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_float __local *object, ++ float __private *expected, float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, ++ float __global *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, ++ float __global *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, ++ float __local *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, ++ float __local *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, ++ float __private *expected, ++ float desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, ++ float __private *expected, ++ float desired); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +-#ifdef cl_khr_fp64 +-bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, +- double desired, memory_order success, memory_order failure, memory_scope scope); +-#endif //cl_khr_fp64 +-bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, +- long desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure, memory_scope scope); +-bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure); +-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, +- ulong desired, memory_order success, memory_order failure, memory_scope scope); ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __global *object, ++ double __global *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __local *object, ++ double __global *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __global *object, ++ double __local *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __local *object, ++ double __local *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __global *object, ++ double __private *expected, double desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_double __local *object, ++ double __private *expected, double desired); ++bool __ovld ++atomic_compare_exchange_weak(volatile atomic_double __global *object, ++ double __global *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, ++ double __global *expected, ++ double desired); ++bool __ovld ++atomic_compare_exchange_weak(volatile atomic_double __global *object, ++ double __local *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, ++ double __local *expected, ++ double desired); ++bool __ovld ++atomic_compare_exchange_weak(volatile atomic_double __global *object, ++ double __private *expected, double desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, ++ double __private *expected, ++ double desired); + #endif ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_long __global *object, ++ long __global *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, ++ long __global *expected, ++ long desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_long __global *object, ++ long __local *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, ++ long __local *expected, ++ long desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_long __global *object, ++ long __private *expected, long desired); ++bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, ++ long __private *expected, ++ long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, ++ long __global *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, ++ long __global *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, ++ long __local *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, ++ long __local *expected, long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, ++ long __private *expected, ++ long desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, ++ long __private *expected, ++ long desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __global *object, ++ ulong __global *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __local *object, ++ ulong __global *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __global *object, ++ ulong __local *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __local *object, ++ ulong __local *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __global *object, ++ ulong __private *expected, ulong desired); ++bool __ovld ++atomic_compare_exchange_strong(volatile atomic_ulong __local *object, ++ ulong __private *expected, ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, ++ ulong __global *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, ++ ulong __global *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, ++ ulong __local *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, ++ ulong __local *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, ++ ulong __private *expected, ++ ulong desired); ++bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, ++ ulong __private *expected, ++ ulong desired); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint *object, uint *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, ++ uint *expected, uint desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float *object, float *expected, float desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, ++ float *expected, ++ float desired, ++ memory_order success, ++ memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint *object, uint *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, ++ int *expected, int desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, ++ uint *expected, uint desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float *object, float *expected, float desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float *object, float *expected, float desired, ++ memory_order success, memory_order failure, memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double *object, double *expected, double desired, ++ memory_order success, memory_order failure, memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long *object, long *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, ++ long *expected, long desired, ++ memory_order success, ++ memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong *object, ulong *expected, ulong desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ++ ulong *expected, ++ ulong desired, ++ memory_order success, ++ memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long *object, long *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, ++ long *expected, long desired, ++ memory_order success, ++ memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong *object, ulong *expected, ulong desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong *object, ulong *expected, ulong desired, ++ memory_order success, memory_order failure, memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __global *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __local *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __global *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_int __local *object, int __private *expected, int desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __global *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __global *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __local *expected, uint desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __global *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_uint __local *object, uint __private *expected, ++ uint desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __global *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __local *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __global *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_float __local *object, float __private *expected, ++ float desired, memory_order success, memory_order failure, ++ memory_scope scope); ++#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __global *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __local *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __global *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_double __local *object, double __private *expected, ++ double desired, memory_order success, memory_order failure, ++ memory_scope scope); ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __global *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __global *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __local *expected, long desired, ++ memory_order success, memory_order failure, memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __global *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_long __local *object, long __private *expected, ++ long desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_strong_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __global *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __local *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __global *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++bool __ovld atomic_compare_exchange_weak_explicit( ++ volatile atomic_ulong __local *object, ulong __private *expected, ++ ulong desired, memory_order success, memory_order failure, ++ memory_scope scope); ++#endif // defined(cl_khr_int64_base_atomics) && ++ // defined(cl_khr_int64_extended_atomics) ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + // atomic_flag_test_and_set() and atomic_flag_clear() +- ++#if defined(__opencl_c_atomic_scope_device) && \ ++ defined(__opencl_c_atomic_order_seq_cst) ++#ifdef __opencl_c_generic_address_space + bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object); +-bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order); +-bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); + void __ovld atomic_flag_clear(volatile atomic_flag *object); +-void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order); +-void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++bool __ovld atomic_flag_test_and_set(volatile atomic_flag __global *object); ++bool __ovld atomic_flag_test_and_set(volatile atomic_flag __local *object); ++void __ovld atomic_flag_clear(volatile atomic_flag __global *object); ++void __ovld atomic_flag_clear(volatile atomic_flag __local *object); ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#endif // defined(__opencl_c_atomic_scope_device) && ++ // defined(__opencl_c_atomic_order_seq_cst) ++ ++#ifdef __opencl_c_generic_address_space ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, ++ memory_order order); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, ++ memory_order order, ++ memory_scope scope); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, ++ memory_order order, memory_scope scope); ++#endif // __opencl_c_generic_address_space ++ ++#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) ++#ifdef __opencl_c_atomic_scope_device ++bool __ovld atomic_flag_test_and_set_explicit( ++ volatile atomic_flag __global *object, memory_order order); ++bool __ovld atomic_flag_test_and_set_explicit( ++ volatile atomic_flag __local *object, memory_order order); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object, ++ memory_order order); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object, ++ memory_order order); ++#endif // __opencl_c_atomic_scope_device ++bool __ovld ++atomic_flag_test_and_set_explicit(volatile atomic_flag __global *object, ++ memory_order order, memory_scope scope); ++bool __ovld ++atomic_flag_test_and_set_explicit(volatile atomic_flag __local *object, ++ memory_order order, memory_scope scope); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object, ++ memory_order order, memory_scope scope); ++void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object, ++ memory_order order, memory_scope scope); ++#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) + + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + +@@ -13906,7 +15943,7 @@ float16 __ovld __cnfn shuffle(float4 x, uint16 mask); + float16 __ovld __cnfn shuffle(float8 x, uint16 mask); + float16 __ovld __cnfn shuffle(float16 x, uint16 mask); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld __cnfn shuffle(double2 x, ulong2 mask); + double2 __ovld __cnfn shuffle(double4 x, ulong2 mask); + double2 __ovld __cnfn shuffle(double8 x, ulong2 mask); +@@ -13926,7 +15963,7 @@ double16 __ovld __cnfn shuffle(double2 x, ulong16 mask); + double16 __ovld __cnfn shuffle(double4 x, ulong16 mask); + double16 __ovld __cnfn shuffle(double8 x, ulong16 mask); + double16 __ovld __cnfn shuffle(double16 x, ulong16 mask); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half2 __ovld __cnfn shuffle(half2 x, ushort2 mask); +@@ -14130,7 +16167,7 @@ float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask); + float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask); + float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask); + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask); + double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask); + double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask); +@@ -14150,7 +16187,7 @@ double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask); + double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask); + double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask); + double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask); +@@ -14186,6 +16223,7 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))) + #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable + #endif //cl_khr_gl_msaa_sharing + ++#ifdef __opencl_c_images + /** + * Use the coordinate (coord.xy) to do an element lookup in + * the 2D image object specified by image. +@@ -14464,6 +16502,7 @@ half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord); + + // Image read functions for read_write images + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord); + int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord); + uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord); +@@ -14507,6 +16546,7 @@ float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, i + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + #ifdef cl_khr_mipmap_image + float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod); + int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod); +@@ -14557,6 +16597,7 @@ int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, + uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY); + + #endif //cl_khr_mipmap_image ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // Image read functions returning half4 type +@@ -14568,6 +16609,7 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord); + half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord); + half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord); + #endif //cl_khr_fp16 ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -14657,7 +16699,7 @@ void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, flo + void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color); + void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color); + void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color); + void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color); +@@ -14690,7 +16732,7 @@ void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, in + void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float depth); + void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float depth); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color); + void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color); + void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color); +@@ -14703,7 +16745,7 @@ void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 + #ifdef cl_khr_fp16 + void __ovld write_imageh(write_only image1d_t image, int coord, half4 color); + void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color); + #endif + void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color); +@@ -14713,6 +16755,7 @@ void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 col + + // Image write functions for read_write images + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color); + void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color); + void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color); +@@ -14733,7 +16776,7 @@ void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, flo + void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color); + void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color); + void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color); + void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color); +@@ -14765,7 +16808,7 @@ void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, in + void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color); + void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color); + void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color); + void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color); +@@ -14778,13 +16821,14 @@ void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 + #ifdef cl_khr_fp16 + void __ovld write_imageh(read_write image1d_t image, int coord, half4 color); + void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color); + #endif + void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color); + void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color); + void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color); + #endif //cl_khr_fp16 ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have +@@ -14798,7 +16842,7 @@ void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 col + int __ovld __cnfn get_image_width(read_only image1d_t image); + int __ovld __cnfn get_image_width(read_only image1d_buffer_t image); + int __ovld __cnfn get_image_width(read_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_width(read_only image3d_t image); + #endif + int __ovld __cnfn get_image_width(read_only image1d_array_t image); +@@ -14817,7 +16861,7 @@ int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image); + int __ovld __cnfn get_image_width(write_only image1d_t image); + int __ovld __cnfn get_image_width(write_only image1d_buffer_t image); + int __ovld __cnfn get_image_width(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_width(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_width(write_only image1d_array_t image); +@@ -14834,6 +16878,7 @@ int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_width(read_write image1d_t image); + int __ovld __cnfn get_image_width(read_write image1d_buffer_t image); + int __ovld __cnfn get_image_width(read_write image2d_t image); +@@ -14850,6 +16895,7 @@ int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image); + int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -14870,7 +16916,7 @@ int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + + int __ovld __cnfn get_image_height(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_height(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_height(write_only image2d_array_t image); +@@ -14886,6 +16932,7 @@ int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_height(read_write image2d_t image); + int __ovld __cnfn get_image_height(read_write image3d_t image); + int __ovld __cnfn get_image_height(read_write image2d_array_t image); +@@ -14899,6 +16946,7 @@ int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image); + int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -14906,12 +16954,14 @@ int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image); + */ + int __ovld __cnfn get_image_depth(read_only image3d_t image); + +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_depth(write_only image3d_t image); + #endif + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_depth(read_write image3d_t image); ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // OpenCL Extension v2.0 s9.18 - Mipmaps +@@ -14927,13 +16977,15 @@ int __ovld get_image_num_mip_levels(read_only image3d_t image); + + int __ovld get_image_num_mip_levels(write_only image1d_t image); + int __ovld get_image_num_mip_levels(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld get_image_num_mip_levels(write_only image3d_t image); + #endif + ++#ifdef __opencl_c_read_write_images + int __ovld get_image_num_mip_levels(read_write image1d_t image); + int __ovld get_image_num_mip_levels(read_write image2d_t image); + int __ovld get_image_num_mip_levels(read_write image3d_t image); ++#endif //__opencl_c_read_write_images + + int __ovld get_image_num_mip_levels(read_only image1d_array_t image); + int __ovld get_image_num_mip_levels(read_only image2d_array_t image); +@@ -14945,10 +16997,12 @@ int __ovld get_image_num_mip_levels(write_only image2d_array_t image); + int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image); + int __ovld get_image_num_mip_levels(write_only image2d_depth_t image); + ++#ifdef __opencl_c_read_write_images + int __ovld get_image_num_mip_levels(read_write image1d_array_t image); + int __ovld get_image_num_mip_levels(read_write image2d_array_t image); + int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image); + int __ovld get_image_num_mip_levels(read_write image2d_depth_t image); ++#endif //__opencl_c_read_write_images + + #endif //cl_khr_mipmap_image + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +@@ -14992,7 +17046,7 @@ int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth + int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image); + int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image); + int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image); +@@ -15009,6 +17063,7 @@ int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_dept + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image); + int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image); + int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image); +@@ -15025,6 +17080,7 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t im + int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -15064,7 +17120,7 @@ int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t i + int __ovld __cnfn get_image_channel_order(write_only image1d_t image); + int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image); + int __ovld __cnfn get_image_channel_order(write_only image2d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int __ovld __cnfn get_image_channel_order(write_only image3d_t image); + #endif + int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image); +@@ -15081,6 +17137,7 @@ int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld __cnfn get_image_channel_order(read_write image1d_t image); + int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image); + int __ovld __cnfn get_image_channel_order(read_write image2d_t image); +@@ -15097,6 +17154,7 @@ int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image) + int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image); + int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -15131,6 +17189,7 @@ int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int2 __ovld __cnfn get_image_dim(read_write image2d_t image); + int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image); + #ifdef cl_khr_depth_images +@@ -15143,6 +17202,7 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image); + int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image); + int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -15152,11 +17212,13 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); + * component and the w component is 0. + */ + int4 __ovld __cnfn get_image_dim(read_only image3d_t image); +-#ifdef cl_khr_3d_image_writes ++#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) + int4 __ovld __cnfn get_image_dim(write_only image3d_t image); + #endif + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int4 __ovld __cnfn get_image_dim(read_write image3d_t image); ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -15184,6 +17246,7 @@ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t + #endif //cl_khr_gl_msaa_sharing + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array); + size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array); + #ifdef cl_khr_depth_images +@@ -15193,6 +17256,7 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image + size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array); + size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array); + #endif //cl_khr_gl_msaa_sharing ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + /** +@@ -15210,16 +17274,21 @@ int __ovld get_image_num_samples(write_only image2d_array_msaa_t image); + int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image); + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_read_write_images + int __ovld get_image_num_samples(read_write image2d_msaa_t image); + int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image); + int __ovld get_image_num_samples(read_write image2d_array_msaa_t image); + int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image); ++#endif //__opencl_c_read_write_images + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + #endif + ++#endif //__opencl_c_images ++ + // OpenCL v2.0 s6.13.15 - Work-group Functions + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_work_group_collective_functions + int __ovld __conv work_group_all(int predicate); + int __ovld __conv work_group_any(int predicate); + +@@ -15243,11 +17312,11 @@ ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z); + float __ovld __conv work_group_broadcast(float a, size_t local_id); + float __ovld __conv work_group_broadcast(float a, size_t x, size_t y); + float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv work_group_broadcast(double a, size_t local_id); + double __ovld __conv work_group_broadcast(double a, size_t x, size_t y); + double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #ifdef cl_khr_fp16 + half __ovld __conv work_group_reduce_add(half x); +@@ -15305,7 +17374,7 @@ float __ovld __conv work_group_scan_exclusive_max(float x); + float __ovld __conv work_group_scan_inclusive_add(float x); + float __ovld __conv work_group_scan_inclusive_min(float x); + float __ovld __conv work_group_scan_inclusive_max(float x); +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv work_group_reduce_add(double x); + double __ovld __conv work_group_reduce_min(double x); + double __ovld __conv work_group_reduce_max(double x); +@@ -15315,19 +17384,18 @@ double __ovld __conv work_group_scan_exclusive_max(double x); + double __ovld __conv work_group_scan_inclusive_add(double x); + double __ovld __conv work_group_scan_inclusive_min(double x); + double __ovld __conv work_group_scan_inclusive_max(double x); +-#endif //cl_khr_fp64 +- ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) ++#endif //__opencl_c_work_group_collective_functions + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // OpenCL v2.0 s6.13.16 - Pipe Functions +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_pipes + bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); +-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +- ++#endif //__opencl_c_pipes + + // OpenCL v2.0 s6.13.17 - Enqueue Kernels + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +- ++#ifdef __opencl_c_device_enqueue + ndrange_t __ovld ndrange_1D(size_t); + ndrange_t __ovld ndrange_1D(size_t, size_t); + ndrange_t __ovld ndrange_1D(size_t, size_t, size_t); +@@ -15355,11 +17423,13 @@ bool __ovld is_valid_event (clk_event_t event); + void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value); + + queue_t __ovld get_default_queue(void); ++#endif //__opencl_c_device_enqueue + #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + // OpenCL Extension v2.0 s9.17 - Sub-groups + +-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ++#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ ++ defined(__opencl_c_subgroups) + // Shared Sub Group Functions + uint __ovld get_sub_group_size(void); + uint __ovld get_max_sub_group_size(void); +@@ -15445,7 +17515,7 @@ half __ovld __conv sub_group_scan_inclusive_min(half x); + half __ovld __conv sub_group_scan_inclusive_max(half x); + #endif //cl_khr_fp16 + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id); + double __ovld __conv sub_group_reduce_add(double x); + double __ovld __conv sub_group_reduce_min(double x); +@@ -15456,7 +17526,7 @@ double __ovld __conv sub_group_scan_exclusive_max(double x); + double __ovld __conv sub_group_scan_inclusive_add(double x); + double __ovld __conv sub_group_scan_inclusive_min(double x); + double __ovld __conv sub_group_scan_inclusive_max(double x); +-#endif //cl_khr_fp64 ++#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) + + #endif //cl_khr_subgroups cl_intel_subgroups + +@@ -16226,16 +18296,22 @@ uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c ); + long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c ); + ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c ); + ++#ifdef __opencl_c_images + uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord ); + uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord ); + uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord ); + uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord ); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord); + uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord); + uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord); + uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord); ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + uint __ovld __conv intel_sub_group_block_read( const __global uint* p ); +@@ -16243,16 +18319,22 @@ uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p ); + uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p ); + uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p ); + ++#ifdef __opencl_c_images + void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data); + void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data); + void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data); + void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data); + void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data); + void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data); + void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data); ++#endif // __opencl_c_read_write_images ++#endif // __opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data ); +@@ -16267,7 +18349,7 @@ half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c ); + half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c ); + #endif + +-#if defined(cl_khr_fp64) ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + double __ovld __conv intel_sub_group_shuffle( double x, uint c ); + double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c ); + double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c ); +@@ -16366,16 +18448,22 @@ ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x ); + short __ovld __conv intel_sub_group_scan_inclusive_max( short x ); + ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x ); + ++#ifdef __opencl_c_images + uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord ); + uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord ); + uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord ); + uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord ); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord ); + uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord ); + uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord ); + uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord ); ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p ); +@@ -16383,16 +18471,22 @@ uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p + uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p ); + uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p ); + ++#ifdef __opencl_c_images + void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data ); + void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data ); + void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data ); + void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data ); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data ); + void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data ); + void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data ); + void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data ); ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data ); +@@ -16400,16 +18494,22 @@ void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint + void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data ); + void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data ); + ++#ifdef __opencl_c_images + ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord ); + ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord ); + ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord ); + ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord ); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord); + ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord); + ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord); + ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord); ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p ); +@@ -16417,16 +18517,22 @@ ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* + ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p ); + ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p ); + ++#ifdef __opencl_c_images + void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data); + void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data); + void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data); + void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data); ++#endif //__opencl_c_images + + #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#ifdef __opencl_c_images ++#ifdef __opencl_c_read_write_images + void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data); + void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data); + void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data); + void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data); ++#endif //__opencl_c_read_write_images ++#endif //__opencl_c_images + #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) + + void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data ); +@@ -16545,6 +18651,7 @@ short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset( + short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size, + ushort2 image_size); + ++#ifdef __opencl_c_images + intel_sub_group_avc_ime_result_t __ovld + intel_sub_group_avc_ime_evaluate_with_single_reference( + read_only image2d_t src_image, read_only image2d_t ref_image, +@@ -16585,6 +18692,7 @@ intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout( + read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, + intel_sub_group_avc_ime_payload_t payload, + intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components); ++#endif //__opencl_c_images + + intel_sub_group_avc_ime_single_reference_streamin_t __ovld + intel_sub_group_avc_ime_get_single_reference_streamin( +@@ -16649,6 +18757,7 @@ intel_sub_group_avc_ref_payload_t __ovld + intel_sub_group_avc_ref_set_bilinear_filter_enable( + intel_sub_group_avc_ref_payload_t payload); + ++#ifdef __opencl_c_images + intel_sub_group_avc_ref_result_t __ovld + intel_sub_group_avc_ref_evaluate_with_single_reference( + read_only image2d_t src_image, read_only image2d_t ref_image, +@@ -16667,6 +18776,7 @@ intel_sub_group_avc_ref_evaluate_with_multi_reference( + read_only image2d_t src_image, uint packed_reference_ids, + uchar packed_reference_field_polarities, sampler_t vme_media_sampler, + intel_sub_group_avc_ref_payload_t payload); ++#endif //__opencl_c_images + + // SIC built-in functions + intel_sub_group_avc_sic_payload_t __ovld +@@ -16717,6 +18827,7 @@ intel_sub_group_avc_sic_set_block_based_raw_skip_sad( + uchar block_based_skip_type, + intel_sub_group_avc_sic_payload_t payload); + ++#ifdef __opencl_c_images + intel_sub_group_avc_sic_result_t __ovld + intel_sub_group_avc_sic_evaluate_ipe( + read_only image2d_t src_image, sampler_t vme_media_sampler, +@@ -16739,6 +18850,7 @@ intel_sub_group_avc_sic_evaluate_with_multi_reference( + read_only image2d_t src_image, uint packed_reference_ids, + uchar packed_reference_field_polarities, sampler_t vme_media_sampler, + intel_sub_group_avc_sic_payload_t payload); ++#endif //__opencl_c_images + + uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape( + intel_sub_group_avc_sic_result_t result); +diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp +index c87d240a8206..8c8f03bba0f3 100644 +--- a/clang/lib/Parse/ParseDecl.cpp ++++ b/clang/lib/Parse/ParseDecl.cpp +@@ -3799,8 +3799,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, + isInvalid = DS.SetTypeAltiVecBool(true, Loc, PrevSpec, DiagID, Policy); + break; + case tok::kw_pipe: +- if (!getLangOpts().OpenCL || (getLangOpts().OpenCLVersion < 200 && +- !getLangOpts().OpenCLCPlusPlus)) { ++ if (!getLangOpts().OpenCLPipeKeyword) { + // OpenCL 2.0 defined this keyword. OpenCL 1.2 and earlier should + // support the "pipe" word as identifier. + Tok.getIdentifierInfo()->revertTokenIDToIdentifier(); +@@ -3922,8 +3921,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, + case tok::kw___generic: + // generic address space is introduced only in OpenCL v2.0 + // see OpenCL C Spec v2.0 s6.5.5 +- if (Actions.getLangOpts().OpenCLVersion < 200 && +- !Actions.getLangOpts().OpenCLCPlusPlus) { ++ if (!Actions.getLangOpts().OpenCLGenericKeyword) { + DiagID = diag::err_opencl_unknown_type_specifier; + PrevSpec = Tok.getIdentifierInfo()->getNameStart(); + isInvalid = true; +@@ -4973,8 +4971,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { + default: return false; + + case tok::kw_pipe: +- return (getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200) || +- getLangOpts().OpenCLCPlusPlus; ++ return getLangOpts().OpenCLPipeKeyword; + + case tok::identifier: // foo::bar + // Unfortunate hack to support "Class.factoryMethod" notation. +@@ -5499,8 +5496,7 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang, + if (Kind == tok::star || Kind == tok::caret) + return true; + +- if (Kind == tok::kw_pipe && +- ((Lang.OpenCL && Lang.OpenCLVersion >= 200) || Lang.OpenCLCPlusPlus)) ++ if (Kind == tok::kw_pipe && Lang.OpenCLPipeKeyword) + return true; + + if (!Lang.CPlusPlus) +diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp +index 6402b31d00b2..65f283ad556b 100644 +--- a/clang/lib/Parse/ParsePragma.cpp ++++ b/clang/lib/Parse/ParsePragma.cpp +@@ -749,12 +749,14 @@ void Parser::HandlePragmaOpenCLExtension() { + if (Name == "all") { + if (State == Disable) { + Opt.disableAll(); +- Opt.enableSupportedCore(getLangOpts()); ++ Opt.enableSupportedCore(); + } else { + PP.Diag(NameLoc, diag::warn_pragma_expected_predicate) << 1; + } ++ } else if (Opt.isFeature(Name)) { ++ PP.Diag(NameLoc, diag::warn_opencl_pragma_feature_ignore) << Ident; + } else if (State == Begin) { +- if (!Opt.isKnown(Name) || !Opt.isSupported(Name, getLangOpts())) { ++ if (!Opt.isKnown(Name) || !Opt.isSupported(Name)) { + Opt.support(Name); + } + Actions.setCurrentOpenCLExtension(Name); +@@ -764,9 +766,9 @@ void Parser::HandlePragmaOpenCLExtension() { + Actions.setCurrentOpenCLExtension(""); + } else if (!Opt.isKnown(Name)) + PP.Diag(NameLoc, diag::warn_pragma_unknown_extension) << Ident; +- else if (Opt.isSupportedExtension(Name, getLangOpts())) ++ else if (Opt.isSupportedExtension(Name)) + Opt.enable(Name, State == Enable); +- else if (Opt.isSupportedCore(Name, getLangOpts())) ++ else if (Opt.isSupportedCore(Name)) + PP.Diag(NameLoc, diag::warn_pragma_extension_is_core) << Ident; + else + PP.Diag(NameLoc, diag::warn_pragma_unsupported_extension) << Ident; +diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td +index 745363a6b43f..6a1fce567090 100644 +--- a/clang/lib/Sema/OpenCLBuiltins.td ++++ b/clang/lib/Sema/OpenCLBuiltins.td +@@ -22,11 +22,13 @@ + class Version { + int ID = _Version; + } ++ + def CLAll : Version< 0>; + def CL10 : Version<100>; + def CL11 : Version<110>; + def CL12 : Version<120>; + def CL20 : Version<200>; ++def CL30 : Version<300>; + + // Address spaces + // Pointer types need to be assigned an address space. +@@ -72,6 +74,14 @@ def ArmIntegerDotProductAccumulateInt8 : FunctionExtension<"cl_arm_integ + def ArmIntegerDotProductAccumulateInt16 : FunctionExtension<"cl_arm_integer_dot_product_accumulate_int16">; + def ArmIntegerDotProductAccumulateSaturateInt8 : FunctionExtension<"cl_arm_integer_dot_product_accumulate_saturate_int8">; + ++// Features ++def FuncExtGenericAddressSpace : FunctionExtension<"__opencl_c_generic_address_space">; ++def FuncExtWorkGroupCollective : FunctionExtension<"__opencl_c_work_group_collective_functions">; ++def FuncExtPipes : FunctionExtension<"__opencl_c_pipes">; ++def FuncExtDeviceSidEenqueue : FunctionExtension<"__opencl_c_device_enqueue">; ++def FuncNonExplicitAtomicFeatures : FunctionExtension<"__opencl_c_atomic_order_seq_cst __opencl_c_atomic_scope_device">; ++def FuncNonExplicitAtomicFeaturesAndGenericAS : FunctionExtension<"__opencl_c_atomic_order_seq_cst __opencl_c_atomic_scope_device __opencl_c_generic_address_space">; ++ + // Qualified Type. These map to ASTContext::QualType. + class QualType { + // Name of the field or function in a clang::ASTContext +@@ -236,13 +246,9 @@ class Builtin _Signature, list _Attributes = Attr. + bit IsConv = _Attributes[2]; + // OpenCL extensions to which the function belongs. + FunctionExtension Extension = FuncExtNone; +- // Version of OpenCL from which the function is available (e.g.: CL10). +- // MinVersion is inclusive. +- Version MinVersion = CL10; +- // Version of OpenCL from which the function is not supported anymore. +- // MaxVersion is exclusive. ++ // List of OpenCL version in which this function available. + // CLAll makes the function available for all versions. +- Version MaxVersion = CLAll; ++ list Versions = [CLAll]; + } + + //===----------------------------------------------------------------------===// +@@ -421,7 +427,7 @@ foreach name = ["get_global_size", "get_global_id", "get_local_size", + def : Builtin; + } + +-let MinVersion = CL20 in { ++let Versions = [CL20, CL30] in { + def : Builtin<"get_enqueued_local_size", [Size, UInt]>; + foreach name = ["get_global_linear_id", "get_local_linear_id"] in { + def : Builtin; +@@ -494,7 +500,7 @@ foreach name = ["fma", "mad"] in { + } + + // --- Version dependent --- +-let MaxVersion = CL20 in { ++let Versions = [CL10, CL11, CL12, CL30] in { + foreach AS = [GlobalAS, LocalAS, PrivateAS] in { + foreach name = ["fract", "modf", "sincos"] in { + def : Builtin]>; +@@ -511,7 +517,9 @@ let MaxVersion = CL20 in { + } + } + } +-let MinVersion = CL20 in { ++ ++let Versions = [CL20, CL30] in { ++ let Extension = FuncExtGenericAddressSpace in { + foreach name = ["fract", "modf", "sincos"] in { + def : Builtin]>; + } +@@ -524,6 +532,7 @@ let MinVersion = CL20 in { + def : Builtin]>; + } + } ++ } + } + + // --- Table 9 --- +@@ -554,7 +563,7 @@ foreach name = ["abs"] in { + foreach name = ["clz", "popcount"] in { + def : Builtin; + } +-let MinVersion = CL20 in { ++let Versions = [CL20, CL30] in { + foreach name = ["ctz"] in { + def : Builtin; + } +@@ -728,7 +737,7 @@ foreach name = ["select"] in { + // --- Table 15 --- + // Variants for OpenCL versions below 2.0, using pointers to the global, local + // and private address spaces. +-let MaxVersion = CL20 in { ++let Versions = [CL10, CL11, CL12, CL30] in { + foreach AS = [GlobalAS, LocalAS, PrivateAS] in { + foreach VSize = [2, 3, 4, 8, 16] in { + foreach name = ["vload" # VSize] in { +@@ -771,7 +780,8 @@ let MaxVersion = CL20 in { + } + // Variants for OpenCL versions above 2.0, using pointers to the generic + // address space. +-let MinVersion = CL20 in { ++let Versions = [CL20, CL30] in { ++ let Extension = FuncExtGenericAddressSpace in { + foreach VSize = [2, 3, 4, 8, 16] in { + foreach name = ["vload" # VSize] in { + def : Builtin, Size, PointerType, GenericAS>]>; +@@ -809,6 +819,7 @@ let MinVersion = CL20 in { + } + } + } ++ } + } + // Variants using pointers to the constant address space. + foreach VSize = [2, 3, 4, 8, 16] in { +@@ -829,7 +840,7 @@ foreach VSize = [2, 3, 4, 8, 16] in { + def : Builtin, Size, PointerType, ConstantAS>]>; + } + } +-let MaxVersion = CL20 in { ++let Versions = [CL10, CL11, CL12, CL30] in { + foreach AS = [GlobalAS, LocalAS, PrivateAS] in { + def : Builtin<"vload_half", [Float, Size, PointerType, AS>]>; + def : Builtin<"vloada_half", [Float, Size, PointerType, AS>]>; +@@ -852,7 +863,8 @@ let MaxVersion = CL20 in { + } + } + } +-let MinVersion = CL20 in { ++let Versions = [CL20, CL30] in { ++ let Extension = FuncExtGenericAddressSpace in { + foreach AS = [GenericAS] in { + def : Builtin<"vload_half", [Float, Size, PointerType, AS>]>; + def : Builtin<"vloada_half", [Float, Size, PointerType, AS>]>; +@@ -874,6 +886,7 @@ let MinVersion = CL20 in { + } + } + } ++ } + } + + foreach AS = [ConstantAS] in { +@@ -899,7 +912,9 @@ foreach name = ["async_work_group_strided_copy"] in { + def : Builtin, PointerType, LocalAS>, Size, Size, Event]>; + } + foreach name = ["wait_group_events"] in { ++ let Extension = FuncExtGenericAddressSpace in { + def : Builtin]>; ++ } + } + foreach name = ["prefetch"] in { + def : Builtin, GlobalAS>, Size]>; +@@ -1000,13 +1015,25 @@ foreach AS = [GlobalAS, LocalAS] in { + } + } + } ++ + // OpenCL v2.0 s6.13.11 - Atomic Functions. +-let MinVersion = CL20 in { ++ ++let Versions = [CL20, CL30] in { ++ let Extension = FuncExtGenericAddressSpace in { ++ foreach TypePair = [[AtomicInt, Int], [AtomicUInt, UInt], ++ [AtomicLong, Long], [AtomicULong, ULong], ++ [AtomicFloat, Float], [AtomicDouble, Double]] in { ++ def : Builtin<"atomic_init", ++ [Void, PointerType, GenericAS>, TypePair[1]]>; ++ } ++ } ++} ++ ++let Versions = [CL20, CL30] in { ++ let Extension = FuncNonExplicitAtomicFeaturesAndGenericAS in { + foreach TypePair = [[AtomicInt, Int], [AtomicUInt, UInt], + [AtomicLong, Long], [AtomicULong, ULong], + [AtomicFloat, Float], [AtomicDouble, Double]] in { +- def : Builtin<"atomic_init", +- [Void, PointerType, GenericAS>, TypePair[1]]>; + def : Builtin<"atomic_store", + [Void, PointerType, GenericAS>, TypePair[1]]>; + def : Builtin<"atomic_load", +@@ -1038,6 +1065,59 @@ let MinVersion = CL20 in { + [TypePair[1], PointerType, GenericAS>, TypePair[2]]>; + } + } ++ } ++} ++ ++let Versions = [CL30] in { ++ foreach AS = [GlobalAS, LocalAS] in { ++ foreach TypePair = [[AtomicInt, Int], [AtomicUInt, UInt], ++ [AtomicLong, Long], [AtomicULong, ULong], ++ [AtomicFloat, Float], [AtomicDouble, Double]] in { ++ def : Builtin<"atomic_init", ++ [Void, PointerType, AS>, TypePair[1]]>; ++ } ++ } ++} ++ ++let Versions = [CL30] in { ++ let Extension = FuncNonExplicitAtomicFeatures in { ++ foreach AS = [GlobalAS, LocalAS] in { ++ foreach TypePair = [[AtomicInt, Int], [AtomicUInt, UInt], ++ [AtomicLong, Long], [AtomicULong, ULong], ++ [AtomicFloat, Float], [AtomicDouble, Double]] in { ++ def : Builtin<"atomic_store", ++ [Void, PointerType, AS>, TypePair[1]]>; ++ def : Builtin<"atomic_load", ++ [TypePair[1], PointerType, AS>]>; ++ def : Builtin<"atomic_exchange", ++ [TypePair[1], PointerType, AS>, TypePair[1]]>; ++ foreach Variant = ["weak", "strong"] in { ++ def : Builtin<"atomic_compare_exchange_" # Variant, ++ [Bool, PointerType, AS>, ++ PointerType, TypePair[1]]>; ++ } ++ } ++ ++ foreach TypePair = [[AtomicInt, Int, Int], [AtomicUInt, UInt, UInt], ++ [AtomicLong, Long, Long], [AtomicULong, ULong, ULong], ++ [AtomicIntPtr, IntPtr, PtrDiff], ++ [AtomicUIntPtr, UIntPtr, PtrDiff]] in { ++ foreach ModOp = ["add", "sub"] in { ++ def : Builtin<"atomic_fetch_" # ModOp, ++ [TypePair[1], PointerType, AS>, TypePair[2]]>; ++ } ++ } ++ foreach TypePair = [[AtomicInt, Int, Int], [AtomicUInt, UInt, UInt], ++ [AtomicLong, Long, Long], [AtomicULong, ULong, ULong], ++ [AtomicIntPtr, IntPtr, IntPtr], ++ [AtomicUIntPtr, UIntPtr, UIntPtr]] in { ++ foreach ModOp = ["or", "xor", "and", "min", "max"] in { ++ def : Builtin<"atomic_fetch_" # ModOp, ++ [TypePair[1], PointerType, AS>, TypePair[2]]>; ++ } ++ } ++ } ++ } + } + + //-------------------------------------------------------------------- +@@ -1217,7 +1297,8 @@ foreach aQual = ["WO", "RW"] in { + //-------------------------------------------------------------------- + // OpenCL v2.0 s6.13.15 - Work-group Functions + // --- Table 26 --- +-let MinVersion = CL20 in { ++let Versions = [CL20, CL30] in { ++ let Extension = FuncExtWorkGroupCollective in { + foreach name = ["work_group_all", "work_group_any"] in { + def : Builtin; + } +@@ -1232,6 +1313,7 @@ let MinVersion = CL20 in { + def : Builtin; + } + } ++ } + } + + +@@ -1241,8 +1323,10 @@ let MinVersion = CL20 in { + // Defined in Builtins.def + + // --- Table 28 --- +-// Builtins taking pipe arguments are defined in Builtins.def +-def : Builtin<"is_valid_reserve_id", [Bool, ReserveId]>; ++let Extension = FuncExtPipes in { ++ // Builtins taking pipe arguments are defined in Builtins.def ++ def : Builtin<"is_valid_reserve_id", [Bool, ReserveId]>; ++} + + // --- Table 29 --- + // Defined in Builtins.def +@@ -1257,21 +1341,22 @@ def : Builtin<"is_valid_reserve_id", [Bool, ReserveId]>; + // Defined in Builtins.def + + // --- Table 33 --- +-def : Builtin<"enqueue_marker", +- [Int, Queue, UInt, PointerType, GenericAS>, PointerType]>; ++let Extension = FuncExtDeviceSidEenqueue in { ++ def : Builtin<"enqueue_marker", ++ [Int, Queue, UInt, PointerType, GenericAS>, PointerType]>; + +-// --- Table 34 --- +-def : Builtin<"retain_event", [Void, ClkEvent]>; +-def : Builtin<"release_event", [Void, ClkEvent]>; +-def : Builtin<"create_user_event", [ClkEvent]>; +-def : Builtin<"is_valid_event", [Bool, ClkEvent]>; +-def : Builtin<"set_user_event_status", [Void, ClkEvent, Int]>; +-// TODO: capture_event_profiling_info +- +-// --- Table 35 --- +-def : Builtin<"get_default_queue", [Queue]>; +-// TODO: ndrange functions ++ // --- Table 34 --- ++ def : Builtin<"retain_event", [Void, ClkEvent]>; ++ def : Builtin<"release_event", [Void, ClkEvent]>; ++ def : Builtin<"create_user_event", [ClkEvent]>; ++ def : Builtin<"is_valid_event", [Bool, ClkEvent]>; ++ def : Builtin<"set_user_event_status", [Void, ClkEvent, Int]>; ++ // TODO: capture_event_profiling_info + ++ // --- Table 35 --- ++ def : Builtin<"get_default_queue", [Queue]>; ++ // TODO: ndrange functions ++} + + //-------------------------------------------------------------------- + // End of the builtin functions defined in the OpenCL C specification. +@@ -1456,7 +1541,7 @@ let Extension = FuncExtKhrSubgroups in { + "get_sub_group_local_id"] in { + def : Builtin; + } +- let MinVersion = CL20 in { ++ let Versions = [CL20] in { + foreach name = ["get_enqueued_num_sub_groups"] in { + def : Builtin; + } +diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp +index 2f2b52106f3d..5092a4691b9b 100644 +--- a/clang/lib/Sema/Sema.cpp ++++ b/clang/lib/Sema/Sema.cpp +@@ -291,9 +291,10 @@ void Sema::Initialize() { + // Initialize predefined OpenCL types and supported extensions and (optional) + // core features. + if (getLangOpts().OpenCL) { ++ getOpenCLOptions().setOpenCLVersion(getLangOpts()); + getOpenCLOptions().addSupport( + Context.getTargetInfo().getSupportedOpenCLOpts()); +- getOpenCLOptions().enableSupportedCore(getLangOpts()); ++ getOpenCLOptions().enableSupportedCore(); + addImplicitTypedef("sampler_t", Context.OCLSamplerTy); + addImplicitTypedef("event_t", Context.OCLEventTy); + if (getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) { +@@ -349,12 +350,18 @@ void Sema::Initialize() { + "cl_khr_int64_base_atomics cl_khr_int64_extended_atomics"); + + setOpenCLExtensionForType(AtomicDoubleT, "cl_khr_fp64"); ++ setOpenCLExtensionForType(Context.OCLReserveIDTy, "__opencl_c_pipes"); ++ setOpenCLExtensionForType(Context.OCLClkEventTy, ++ "__opencl_c_device_enqueue"); ++ setOpenCLExtensionForType(Context.OCLQueueTy, ++ "__opencl_c_device_enqueue"); + } + + setOpenCLExtensionForType(Context.DoubleTy, "cl_khr_fp64"); + +-#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \ +- setOpenCLExtensionForType(Context.Id, Ext); ++#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \ ++ setOpenCLExtensionForType(Context.Id, Ext); \ ++ setOpenCLExtensionForType(Context.Id, "__opencl_c_images"); + #include "clang/Basic/OpenCLImageTypes.def" + #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + addImplicitTypedef(#ExtType, Context.Id##Ty); \ +@@ -2423,6 +2430,27 @@ bool Sema::isOpenCLDisabledDecl(Decl *FD) { + return false; + } + ++template ++void Sema::DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName, ++ DiagLocT DiagLoc, DiagInfoT DiagInfo, ++ unsigned Selector, ++ SourceRange SrcRange) { ++ const auto &LO = getLangOpts(); ++ auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; ++ // For versions higher that 3.0 diagnosing feature ++ if (CLVer >= 300) { ++ OpenCLOptName = ++ llvm::StringSwitch(OpenCLOptName) ++ .Case("cl_khr_3d_image_writes", "__opencl_c_3d_image_writes") ++ .Case("cl_khr_subgroups", "__opencl_c_subgroups") ++ .Case("cl_khr_fp64", "__opencl_c_fp64") ++ .Default(OpenCLOptName); ++ } ++ ++ Diag(DiagLoc, diag::err_opencl_requires_extension) ++ << Selector << DiagInfo << OpenCLOptName << SrcRange; ++} ++ + template + bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, + DiagInfoT DiagInfo, MapT &Map, +@@ -2434,8 +2462,7 @@ bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, + bool Disabled = false; + for (auto &I : Loc->second) { + if (I != CurrOpenCLExtension && !getOpenCLOptions().isEnabled(I)) { +- Diag(DiagLoc, diag::err_opencl_requires_extension) << Selector << DiagInfo +- << I << SrcRange; ++ DiagnoseOpenCLRequiresOption(I, DiagLoc, DiagInfo, Selector, SrcRange); + Disabled = true; + } + } +@@ -2471,3 +2498,13 @@ bool Sema::checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E) { + return checkOpenCLDisabledTypeOrDecl(&D, E.getBeginLoc(), FnName, + OpenCLDeclExtMap, 1, D.getSourceRange()); + } ++ ++bool Sema::checkOpenCLSubgroupExtForCallExpr(CallExpr *Call) { ++ if (!getOpenCLOptions().isEnabled("cl_khr_subgroups")) { ++ DiagnoseOpenCLRequiresOption("cl_khr_subgroups", Call->getBeginLoc(), ++ Call->getDirectCallee(), 1, ++ Call->getSourceRange()); ++ return true; ++ } ++ return false; ++} +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index 1bf04d9cb4f2..9dd63db0f420 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -835,20 +835,11 @@ static bool checkOpenCLBlockArgs(Sema &S, Expr *BlockArg) { + return IllegalParams; + } + +-static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) { +- if (!S.getOpenCLOptions().isEnabled("cl_khr_subgroups")) { +- S.Diag(Call->getBeginLoc(), diag::err_opencl_requires_extension) +- << 1 << Call->getDirectCallee() << "cl_khr_subgroups"; +- return true; +- } +- return false; +-} +- + static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) { + if (checkArgCount(S, TheCall, 2)) + return true; + +- if (checkOpenCLSubgroupExt(S, TheCall)) ++ if (S.checkOpenCLSubgroupExtForCallExpr(TheCall)) + return true; + + // First argument is an ndrange_t type. +@@ -1883,7 +1874,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, + break; + case Builtin::BIsub_group_reserve_read_pipe: + case Builtin::BIsub_group_reserve_write_pipe: +- if (checkOpenCLSubgroupExt(*this, TheCall) || ++ if (checkOpenCLSubgroupExtForCallExpr(TheCall) || + SemaBuiltinReserveRWPipe(*this, TheCall)) + return ExprError(); + break; +@@ -1896,7 +1887,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, + break; + case Builtin::BIsub_group_commit_read_pipe: + case Builtin::BIsub_group_commit_write_pipe: +- if (checkOpenCLSubgroupExt(*this, TheCall) || ++ if (checkOpenCLSubgroupExtForCallExpr(TheCall) || + SemaBuiltinCommitRWPipe(*this, TheCall)) + return ExprError(); + break; +@@ -4149,6 +4140,20 @@ DiagnoseCStringFormatDirectiveInCFAPI(Sema &S, + } + } + ++bool Sema::isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const { ++ assert(llvm::isValidAtomicOrderingCABI(Ordering)); ++ auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering; ++ switch (OrderingCABI) { ++ case llvm::AtomicOrderingCABI::acquire: ++ case llvm::AtomicOrderingCABI::release: ++ case llvm::AtomicOrderingCABI::acq_rel: ++ return OpenCLFeatures.isEnabled("__opencl_c_atomic_order_acq_rel"); ++ ++ default: ++ return true; ++ } ++} ++ + /// Determine whether the given type has a non-null nullability annotation. + static bool isNonNullType(ASTContext &ctx, QualType type) { + if (auto nullability = type->getNullability(ctx)) +@@ -4920,10 +4925,17 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, + if (SubExprs.size() >= 2 && Form != Init) { + llvm::APSInt Result(32); + if (SubExprs[1]->isIntegerConstantExpr(Result, Context) && +- !isValidOrderingForOp(Result.getSExtValue(), Op)) ++ !isValidOrderingForOp(Result.getSExtValue(), Op)) { + Diag(SubExprs[1]->getBeginLoc(), + diag::warn_atomic_op_has_invalid_memory_order) + << SubExprs[1]->getSourceRange(); ++ } else if (IsOpenCL && ++ !isSupportedOpenCLOMemoryOrdering(Result.getSExtValue())) { ++ Diag(SubExprs[1]->getBeginLoc(), ++ diag::err_opencl_memory_ordering_require_feat) ++ << SubExprs[1]->getSourceRange(); ++ return ExprError(); ++ } + } + + if (auto ScopeModel = AtomicExpr::getScopeModel(Op)) { +diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp +index 5b0417fa8859..2dee60763d57 100644 +--- a/clang/lib/Sema/SemaDecl.cpp ++++ b/clang/lib/Sema/SemaDecl.cpp +@@ -6290,7 +6290,9 @@ void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) { + if (Type->isSamplerT() || Type->isVoidType()) + return; + LangAS ImplAS = LangAS::opencl_private; +- if ((getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) && ++ if ((getLangOpts().OpenCLCPlusPlus || ++ getOpenCLOptions().isEnabled( ++ "__opencl_c_program_scope_global_variables")) && + Var->hasGlobalStorage()) + ImplAS = LangAS::opencl_global; + // If the original type from a decayed type is an array type and that array +@@ -7849,18 +7851,22 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { + // OpenCL C v2.0 s6.5.1 - Variables defined at program scope and static + // variables inside a function can also be declared in the global + // address space. ++ // OpenCL C v3.0 s5.5 - For OpenCL C 2.0, or with the ++ // __opencl_c_program_scope_global_variables feature macro, the ++ // address space for a variable at program scope or a static variable ++ // also be __global + // C++ for OpenCL inherits rule from OpenCL C v2.0. + // FIXME: Adding local AS in C++ for OpenCL might make sense. + if (NewVD->isFileVarDecl() || NewVD->isStaticLocal() || + NewVD->hasExternalStorage()) { +- if (!T->isSamplerT() && +- !T->isDependentType() && ++ if (!T->isSamplerT() && !T->isDependentType() && + !(T.getAddressSpace() == LangAS::opencl_constant || + (T.getAddressSpace() == LangAS::opencl_global && +- (getLangOpts().OpenCLVersion == 200 || +- getLangOpts().OpenCLCPlusPlus)))) { ++ (OpenCLFeatures.isEnabled( ++ "__opencl_c_program_scope_global_variables"))))) { + int Scope = NewVD->isStaticLocal() | NewVD->hasExternalStorage() << 1; +- if (getLangOpts().OpenCLVersion == 200 || getLangOpts().OpenCLCPlusPlus) ++ if (OpenCLFeatures.isEnabled( ++ "__opencl_c_program_scope_global_variables")) + Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space) + << Scope << "global or constant"; + else +diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp +index a9a2a19b4797..73b35eae176a 100644 +--- a/clang/lib/Sema/SemaDeclAttr.cpp ++++ b/clang/lib/Sema/SemaDeclAttr.cpp +@@ -6488,7 +6488,7 @@ static void handleInternalLinkageAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + } + + static void handleOpenCLNoSVMAttr(Sema &S, Decl *D, const ParsedAttr &AL) { +- if (S.LangOpts.OpenCLVersion != 200) ++ if (S.LangOpts.OpenCLVersion < 200) + S.Diag(AL.getLoc(), diag::err_attribute_requires_opencl_version) + << AL << "2.0" << 0; + else +@@ -6572,6 +6572,13 @@ static void handleOpenCLAccessAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + << AL << PDecl->getType() << DeclTy->isImageType(); + D->setInvalidDecl(true); + return; ++ } else if ((!S.getLangOpts().OpenCLCPlusPlus && ++ S.getLangOpts().OpenCLVersion >= 200) && ++ !S.getOpenCLOptions().isEnabled( ++ "__opencl_c_read_write_images")) { ++ S.Diag(AL.getLoc(), diag::err_opencl_requires_extension) ++ << 0 << PDecl->getType() << "__opencl_c_read_write_images"; ++ return; + } + } + } +diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp +index 22bf35dbd0cb..b2081bb6b407 100644 +--- a/clang/lib/Sema/SemaDeclCXX.cpp ++++ b/clang/lib/Sema/SemaDeclCXX.cpp +@@ -38,9 +38,9 @@ + #include "clang/Sema/ScopeInfo.h" + #include "clang/Sema/SemaInternal.h" + #include "clang/Sema/Template.h" ++#include "llvm/ADT/STLExtras.h" + #include "llvm/ADT/ScopeExit.h" + #include "llvm/ADT/SmallString.h" +-#include "llvm/ADT/STLExtras.h" + #include "llvm/ADT/StringExtras.h" + #include + #include +@@ -15228,6 +15228,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl, + if (auto *PtrTy = ResultType->getAs()) { + ResultType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy); + } ++ if (CanQual ExpectedPtrTy = ++ ExpectedResultType->getAs()) { ++ ExpectedResultType = SemaRef.Context.getCanonicalType( ++ RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr())); ++ } + } + + // Check that the result type is what we expect. +@@ -15261,6 +15266,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl, + FnDecl->getParamDecl(0)->getType()->getAs()) { + FirstParamType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy); + } ++ if (CanQual ExpectedPtrTy = ++ ExpectedFirstParamType->getAs()) { ++ ExpectedFirstParamType = SemaRef.Context.getCanonicalType( ++ RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr())); ++ } + } + + // Check that the first parameter type is what we expect. +diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp +index bcbecd545398..40eaebbc9495 100644 +--- a/clang/lib/Sema/SemaLookup.cpp ++++ b/clang/lib/Sema/SemaLookup.cpp +@@ -775,19 +775,20 @@ static void InsertOCLBuiltinDeclarationsFromTable(Sema &S, LookupResult &LR, + // as argument. Only meaningful for generic types, otherwise equals 1. + unsigned GenTypeMaxCnt; + ++ ASTContext &Context = S.Context; ++ ++ // Ignore this BIF if its version does not match the language options. ++ unsigned OpenCLVersion = Context.getLangOpts().OpenCLVersion; ++ if (Context.getLangOpts().OpenCLCPlusPlus) ++ OpenCLVersion = 200; ++ ++ unsigned short VersionCode = OpenCLOptions::EncodeVersion(OpenCLVersion); ++ + for (unsigned SignatureIndex = 0; SignatureIndex < Len; SignatureIndex++) { + const OpenCLBuiltinStruct &OpenCLBuiltin = + BuiltinTable[FctIndex + SignatureIndex]; +- ASTContext &Context = S.Context; + +- // Ignore this BIF if its version does not match the language options. +- unsigned OpenCLVersion = Context.getLangOpts().OpenCLVersion; +- if (Context.getLangOpts().OpenCLCPlusPlus) +- OpenCLVersion = 200; +- if (OpenCLVersion < OpenCLBuiltin.MinVersion) +- continue; +- if ((OpenCLBuiltin.MaxVersion != 0) && +- (OpenCLVersion >= OpenCLBuiltin.MaxVersion)) ++ if (!(OpenCLBuiltin.AllVersions & VersionCode)) + continue; + + SmallVector RetTypes; +diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp +index cc151a048b98..f380a5656a57 100644 +--- a/clang/lib/Sema/SemaType.cpp ++++ b/clang/lib/Sema/SemaType.cpp +@@ -2042,7 +2042,7 @@ static QualType deduceOpenCLPointeeAddrSpace(Sema &S, QualType PointeeType) { + !PointeeType.hasAddressSpace()) + PointeeType = S.getASTContext().getAddrSpaceQualType( + PointeeType, +- S.getLangOpts().OpenCLCPlusPlus || S.getLangOpts().OpenCLVersion == 200 ++ S.getOpenCLOptions().isEnabled("__opencl_c_generic_address_space") + ? LangAS::opencl_generic + : LangAS::opencl_private); + return PointeeType; +@@ -5316,9 +5316,15 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, + } + + case DeclaratorChunk::Pipe: { +- T = S.BuildReadPipeType(T, DeclType.Loc); +- processTypeAttrs(state, T, TAL_DeclSpec, +- D.getMutableDeclSpec().getAttributes()); ++ if (S.getOpenCLOptions().isEnabled("__opencl_c_pipes")) { ++ T = S.BuildReadPipeType(T, DeclType.Loc); ++ processTypeAttrs(state, T, TAL_DeclSpec, ++ D.getMutableDeclSpec().getAttributes()); ++ } else { ++ D.setInvalidType(true); ++ T = Context.IntTy; ++ S.Diag(D.getIdentifierLoc(), diag::err_opencl_pipes_require_feat); ++ } + break; + } + } +diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +index e1f3f6fe1419..b43d9df5935c 100644 +--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl ++++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +@@ -2,7 +2,8 @@ + // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s + // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s + // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s +- ++// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s + typedef int int2 __attribute__((ext_vector_type(2))); + + typedef struct { +@@ -39,7 +40,7 @@ struct LargeStructTwoMember { + int2 y[20]; + }; + +-#if __OPENCL_C_VERSION__ >= 200 ++#ifdef __opencl_c_program_scope_global_variables + struct LargeStructOneMember g_s; + #endif + +@@ -98,7 +99,7 @@ void FuncOneLargeMember(struct LargeStructOneMember u) { + // AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)* + // AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false) + // AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) +-#if __OPENCL_C_VERSION__ >= 200 ++#ifdef __opencl_c_program_scope_global_variables + void test_indirect_arg_globl(void) { + FuncOneLargeMember(g_s); + } +diff --git a/clang/test/CodeGenOpenCL/address-spaces-conversions.cl b/clang/test/CodeGenOpenCL/address-spaces-conversions.cl +index 52feccc540bb..9ecffcca5ee9 100644 +--- a/clang/test/CodeGenOpenCL/address-spaces-conversions.cl ++++ b/clang/test/CodeGenOpenCL/address-spaces-conversions.cl +@@ -1,5 +1,7 @@ + // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck %s + // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s ++// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s + // When -ffake-address-space-map is not used, all addr space mapped to 0 for x86_64. + + // test that we generate address space casts everywhere we need conversions of +diff --git a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl +index 50622f099143..e19ec8451d0d 100644 +--- a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl ++++ b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl +@@ -6,6 +6,7 @@ + // We check that the address spaces are mangled the same in both version of OpenCL + // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s + // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s + + // We can't name this f as private is equivalent to default + // no specifier given address space so we get multiple definition +@@ -47,7 +48,7 @@ void f(constant int *arg) { } + // OCL-20-DAG: @_Z1fPU3AS2i + // OCL-12-DAG: @_Z1fPU3AS2i + +-#if __OPENCL_C_VERSION__ >= 200 ++#if __OPENCL_C_VERSION__ == 200 + __attribute__((overloadable)) + void f(generic int *arg) { } + // ASMANG20: @_Z1fPU3AS4i +diff --git a/clang/test/CodeGenOpenCL/address-spaces.cl b/clang/test/CodeGenOpenCL/address-spaces.cl +index 144466a690b3..b150e1080b53 100644 +--- a/clang/test/CodeGenOpenCL/address-spaces.cl ++++ b/clang/test/CodeGenOpenCL/address-spaces.cl +@@ -1,9 +1,13 @@ + // RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR + // RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR + // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s + // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20AMDGCN + // RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s + // RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s ++// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s + + // SPIR: %struct.S = type { i32, i32, i32* } + // CL20SPIR: %struct.S = type { i32, i32, i32 addrspace(4)* } +diff --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl +index 7216cb517495..8d3b30fe8074 100644 +--- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl ++++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s ++// RUN: %clang_cc1 -O0 -cl-std=CL3.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s + // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s + + // CL12-LABEL: define void @func1(i32 addrspace(5)* %x) +diff --git a/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl b/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl +index a5d438933fa4..8073c7756eb6 100644 +--- a/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl ++++ b/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl +@@ -4,6 +4,17 @@ + // RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s + // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL1.2 %s -emit-llvm -o - | FileCheck %s + // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple r600 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple r600 -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s ++// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s + + #ifdef __AMDGCN__ + #define PTSIZE 8 +@@ -11,7 +22,7 @@ + #define PTSIZE 4 + #endif + +-#ifdef cl_khr_fp64 ++#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) + #pragma OPENCL EXTENSION cl_khr_fp64 : enable + #endif + #ifdef cl_khr_fp16 +@@ -59,8 +70,12 @@ void test() { + check(__alignof__(double) == 8); + #endif + +- check(sizeof(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4)); +- check(__alignof__(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4)); ++ check(sizeof(private void*) == 4); ++ check(__alignof__(private void*) == 4); ++#ifdef __opencl_c_generic_address_space ++ check(sizeof(generic void*) == 8); ++ check(__alignof__(generic void*) == 8); ++#endif + check(sizeof(global_ptr_t) == PTSIZE); + check(__alignof__(global_ptr_t) == PTSIZE); + check(sizeof(constant_ptr_t) == PTSIZE); +diff --git a/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl +index d1ab6aceac5c..70c5bace023b 100644 +--- a/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl ++++ b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL1.2 -emit-llvm -o - -O0 | FileCheck %s ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s + + #pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable + void test_int8(uchar4 ua, uchar4 ub, char4 sa, char4 sb) { +diff --git a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl +index 76ace5dca21e..5dc43e222f75 100644 +--- a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl ++++ b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl +@@ -1,6 +1,8 @@ + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM ++// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM ++// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM + + kernel void ker() {}; + // CHECK: define{{.*}}@ker() #0 +diff --git a/clang/test/CodeGenOpenCL/fpmath.cl b/clang/test/CodeGenOpenCL/fpmath.cl +index 0108d909c94e..b28392739c71 100644 +--- a/clang/test/CodeGenOpenCL/fpmath.cl ++++ b/clang/test/CodeGenOpenCL/fpmath.cl +@@ -2,6 +2,8 @@ + // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s + // RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s + // RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL1.2 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL3.0 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s + + typedef __attribute__(( ext_vector_type(4) )) float float4; + +diff --git a/clang/test/CodeGenOpenCL/generic-address-space-feature.cl b/clang/test/CodeGenOpenCL/generic-address-space-feature.cl +new file mode 100644 +index 000000000000..890860abe4d9 +--- /dev/null ++++ b/clang/test/CodeGenOpenCL/generic-address-space-feature.cl +@@ -0,0 +1,28 @@ ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL12 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL12 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL20 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL20 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,__opencl_c_generic_address_space -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30-GENERIC ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL30 ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,__opencl_c_generic_address_space -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL30-GENERIC ++ ++void test(global float* src1, local float *src2, private float *src3, float *src4, float tmp) { ++ // CL20: %{{.+}} = addrspacecast float addrspace(1)* %{{.+}} to float addrspace(4)* ++ // CL12-NOT: addrspacecast ++ // CL30-NOT: addrspacecast ++ // CL30-GENERIC-NOT: addrspacecast ++ tmp = sincos(tmp, src1); ++ // CL20: %{{.+}} = addrspacecast float addrspace(3)* %{{.+}} to float addrspace(4)* ++ // CL12-NOT: addrspacecast ++ // CL30-NOT: addrspacecast ++ // CL30-GENERIC-NOT: addrspacecast ++ tmp = sincos(tmp, src2); ++ ++ // CL12: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}}) ++ // CL20: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}}) ++ // CL30: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}}) ++ // CL30-GENERIC: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}}) ++ // CHECK: addrspacecast ++ tmp = sincos(tmp, src4); ++} +diff --git a/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl b/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl +index 515f13f6e768..5aa31ac6f345 100644 +--- a/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl ++++ b/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s ++// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s + + // CHECK: %opencl.intel_sub_group_avc_mce_payload_t = type opaque + // CHECK: %opencl.intel_sub_group_avc_ime_payload_t = type opaque +diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl +index e89237623478..3d6708ac361f 100644 +--- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl ++++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl +@@ -1,5 +1,8 @@ + // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s + // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s ++ + // Test that the kernels always use the SPIR calling convention + // to have unambiguous mapping of arguments to feasibly implement + // clSetKernelArg(). +diff --git a/clang/test/CodeGenOpenCL/logical-ops.cl b/clang/test/CodeGenOpenCL/logical-ops.cl +index f083a8580ee7..499eab65039b 100644 +--- a/clang/test/CodeGenOpenCL/logical-ops.cl ++++ b/clang/test/CodeGenOpenCL/logical-ops.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s ++// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s + // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=clc++ -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s + + #pragma OPENCL EXTENSION cl_khr_fp64 : enable +diff --git a/clang/test/CodeGenOpenCL/no-half.cl b/clang/test/CodeGenOpenCL/no-half.cl +index aee8f678f01a..46da7fa339e8 100644 +--- a/clang/test/CodeGenOpenCL/no-half.cl ++++ b/clang/test/CodeGenOpenCL/no-half.cl +@@ -1,6 +1,7 @@ + // RUN: %clang_cc1 %s -cl-std=cl2.0 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s + // RUN: %clang_cc1 %s -cl-std=cl1.2 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s + // RUN: %clang_cc1 %s -cl-std=cl1.1 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s ++// RUN: %clang_cc1 %s -cl-std=cl3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s + + #pragma OPENCL EXTENSION cl_khr_fp64:enable + +diff --git a/clang/test/CodeGenOpenCL/pipe_builtin.cl b/clang/test/CodeGenOpenCL/pipe_builtin.cl +index 02b9669b7ab1..0bf35c336199 100644 +--- a/clang/test/CodeGenOpenCL/pipe_builtin.cl ++++ b/clang/test/CodeGenOpenCL/pipe_builtin.cl +@@ -1,4 +1,7 @@ + // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=clc++ -o - %s | FileCheck %s ++// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl2.0 -o - %s | FileCheck %s ++// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl3.0 -cl-ext=__opencl_c_pipes,__opencl_c_subgroups -o - %s | FileCheck %s ++ + // FIXME: Add MS ABI manglings of OpenCL things and remove %itanium_abi_triple + // above to support OpenCL in the MS C++ ABI. + +diff --git a/clang/test/CodeGenOpenCL/pipe_types.cl b/clang/test/CodeGenOpenCL/pipe_types.cl +index ba064c6d7557..b7a523d4f084 100644 +--- a/clang/test/CodeGenOpenCL/pipe_types.cl ++++ b/clang/test/CodeGenOpenCL/pipe_types.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s ++// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -o - %s | FileCheck %s + + // CHECK: %opencl.pipe_ro_t = type opaque + // CHECK: %opencl.pipe_wo_t = type opaque +diff --git a/clang/test/CodeGenOpenCL/printf.cl b/clang/test/CodeGenOpenCL/printf.cl +index fc139d776db6..0133c5595d81 100644 +--- a/clang/test/CodeGenOpenCL/printf.cl ++++ b/clang/test/CodeGenOpenCL/printf.cl +@@ -1,5 +1,7 @@ + // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-+cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s + // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s ++// RUN: %clang_cc1 -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s ++// RUN: %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s + + typedef __attribute__((ext_vector_type(2))) float float2; + typedef __attribute__((ext_vector_type(2))) half half2; +diff --git a/clang/test/CodeGenOpenCL/unroll-hint.cl b/clang/test/CodeGenOpenCL/unroll-hint.cl +index 0f84450a1ae6..9347c935869b 100644 +--- a/clang/test/CodeGenOpenCL/unroll-hint.cl ++++ b/clang/test/CodeGenOpenCL/unroll-hint.cl +@@ -1,5 +1,6 @@ + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s + // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s | FileCheck %s ++// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s | FileCheck %s + + /*** for ***/ + void for_count() +diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c +index 18b31320ea80..a6e7be887c8c 100644 +--- a/clang/test/Driver/autocomplete.c ++++ b/clang/test/Driver/autocomplete.c +@@ -43,6 +43,8 @@ + // CLSTDALL-NEXT: CL1.2 + // CLSTDALL-NEXT: cl2.0 + // CLSTDALL-NEXT: CL2.0 ++// CLSTDALL-NEXT: cl3.0 ++// CLSTDALL-NEXT: CL3.0 + // CLSTDALL-NEXT: clc++ + // CLSTDALL-NEXT: CLC++ + // RUN: %clang --autocomplete=-fno-sanitize-coverage=,f | FileCheck %s -check-prefix=FNOSANICOVER +diff --git a/clang/test/Driver/opencl.cl b/clang/test/Driver/opencl.cl +index 05588f2c8b81..cc0a9143ab37 100644 +--- a/clang/test/Driver/opencl.cl ++++ b/clang/test/Driver/opencl.cl +@@ -2,6 +2,7 @@ + // RUN: %clang -S -### -cl-std=CL1.1 %s 2>&1 | FileCheck --check-prefix=CHECK-CL11 %s + // RUN: %clang -S -### -cl-std=CL1.2 %s 2>&1 | FileCheck --check-prefix=CHECK-CL12 %s + // RUN: %clang -S -### -cl-std=CL2.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL20 %s ++// RUN: %clang -S -### -cl-std=CL3.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL30 %s + // RUN: %clang -S -### -cl-std=clc++ %s 2>&1 | FileCheck --check-prefix=CHECK-CLCPP %s + // RUN: %clang -S -### -cl-opt-disable %s 2>&1 | FileCheck --check-prefix=CHECK-OPT-DISABLE %s + // RUN: %clang -S -### -cl-strict-aliasing %s 2>&1 | FileCheck --check-prefix=CHECK-STRICT-ALIASING %s +@@ -22,6 +23,7 @@ + // CHECK-CL11: "-cc1" {{.*}} "-cl-std=CL1.1" + // CHECK-CL12: "-cc1" {{.*}} "-cl-std=CL1.2" + // CHECK-CL20: "-cc1" {{.*}} "-cl-std=CL2.0" ++// CHECK-CL30: "-cc1" {{.*}} "-cl-std=CL3.0" + // CHECK-CLCPP: "-cc1" {{.*}} "-cl-std=clc++" + // CHECK-OPT-DISABLE: "-cc1" {{.*}} "-cl-opt-disable" + // CHECK-STRICT-ALIASING: "-cc1" {{.*}} "-cl-strict-aliasing" +diff --git a/clang/test/Driver/unknown-std.cl b/clang/test/Driver/unknown-std.cl +index 6f371bac13ac..00209fb62556 100644 +--- a/clang/test/Driver/unknown-std.cl ++++ b/clang/test/Driver/unknown-std.cl +@@ -10,6 +10,7 @@ + // CHECK-NEXT: note: use 'cl1.1' for 'OpenCL 1.1' standard + // CHECK-NEXT: note: use 'cl1.2' for 'OpenCL 1.2' standard + // CHECK-NEXT: note: use 'cl2.0' for 'OpenCL 2.0' standard ++// CHECK-NEXT: note: use 'cl3.0' for 'OpenCL 3.0' standard + // CHECK-NEXT: note: use 'clc++' for 'C++ for OpenCL' standard + + // Make sure that no other output is present. +diff --git a/clang/test/Frontend/stdlang.c b/clang/test/Frontend/stdlang.c +index 51484999e37a..eac4632fbdd6 100644 +--- a/clang/test/Frontend/stdlang.c ++++ b/clang/test/Frontend/stdlang.c +@@ -9,6 +9,7 @@ + // RUN: %clang_cc1 -x cl -cl-std=CL1.1 -DOPENCL %s + // RUN: %clang_cc1 -x cl -cl-std=CL1.2 -DOPENCL %s + // RUN: %clang_cc1 -x cl -cl-std=CL2.0 -DOPENCL %s ++// RUN: %clang_cc1 -x cl -cl-std=CL3.0 -DOPENCL %s + // RUN: %clang_cc1 -x cl -cl-std=CLC++ -DOPENCL %s + // RUN: not %clang_cc1 -x cl -std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s + // RUN: not %clang_cc1 -x cl -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s +diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl +index 1b151ffdd16a..2716076acdcf 100644 +--- a/clang/test/Headers/opencl-c-header.cl ++++ b/clang/test/Headers/opencl-c-header.cl +@@ -1,6 +1,7 @@ + // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify | FileCheck %s + // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.1 | FileCheck %s + // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.2 | FileCheck %s ++// RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL3.0 | FileCheck %s + // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=clc++ | FileCheck %s --check-prefix=CHECK20 + + // Test including the default header as a module. +@@ -39,9 +40,11 @@ + // RUN: rm -rf %t + // RUN: mkdir -p %t + // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s ++// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s + // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s + // RUN: chmod u-w %t + // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s ++// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s + // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s + // RUN: chmod u+w %t + +@@ -67,7 +70,7 @@ char f(char x) { + // from OpenCL 2.0 onwards. + + // CHECK20: _Z12write_imagef14ocl_image3d_wo +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) + void test_image3dwo(write_only image3d_t img) { + write_imagef(img, (0), (0.0f)); + } +@@ -81,7 +84,7 @@ void test_atomics(__generic volatile unsigned int* a) { + #endif + + // Verify that ATOMIC_VAR_INIT is defined. +-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) + global atomic_int z = ATOMIC_VAR_INIT(99); + #endif //__OPENCL_C_VERSION__ + +diff --git a/clang/test/Index/pipe-size.cl b/clang/test/Index/pipe-size.cl +index 94a1255f0a48..59b76051eda1 100644 +--- a/clang/test/Index/pipe-size.cl ++++ b/clang/test/Index/pipe-size.cl +@@ -2,6 +2,13 @@ + // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR + // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64 + // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=X86 ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64 ++// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN ++ ++ ++ + __kernel void testPipe( pipe int test ) + { + int s = sizeof(test); +diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c +index df2a6128989b..cbb0995b48b0 100644 +--- a/clang/test/Preprocessor/init-aarch64.c ++++ b/clang/test/Preprocessor/init-aarch64.c +@@ -109,12 +109,12 @@ + // AARCH64-NEXT: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1 + // AARCH64_CXX-NEXT: #define __GLIBCXX_BITSIZE_INT_N_0 128 + // AARCH64_CXX-NEXT: #define __GLIBCXX_TYPE_INT_N_0 __int128 +-// AARCH64-NEXT: #define __INT16_C_SUFFIX__ ++// AARCH64-NEXT: #define __INT16_C_SUFFIX__ + // AARCH64-NEXT: #define __INT16_FMTd__ "hd" + // AARCH64-NEXT: #define __INT16_FMTi__ "hi" + // AARCH64-NEXT: #define __INT16_MAX__ 32767 + // AARCH64-NEXT: #define __INT16_TYPE__ short +-// AARCH64-NEXT: #define __INT32_C_SUFFIX__ ++// AARCH64-NEXT: #define __INT32_C_SUFFIX__ + // AARCH64-NEXT: #define __INT32_FMTd__ "d" + // AARCH64-NEXT: #define __INT32_FMTi__ "i" + // AARCH64-NEXT: #define __INT32_MAX__ 2147483647 +@@ -124,7 +124,7 @@ + // AARCH64-NEXT: #define __INT64_FMTi__ "li" + // AARCH64-NEXT: #define __INT64_MAX__ 9223372036854775807L + // AARCH64-NEXT: #define __INT64_TYPE__ long int +-// AARCH64-NEXT: #define __INT8_C_SUFFIX__ ++// AARCH64-NEXT: #define __INT8_C_SUFFIX__ + // AARCH64-NEXT: #define __INT8_FMTd__ "hhd" + // AARCH64-NEXT: #define __INT8_FMTi__ "hhi" + // AARCH64-NEXT: #define __INT8_MAX__ 127 +@@ -238,7 +238,7 @@ + // AARCH64-NEXT: #define __STDC_UTF_32__ 1 + // AARCH64_C: #define __STDC_VERSION__ 201710L + // AARCH64-NEXT: #define __STDC__ 1 +-// AARCH64-NEXT: #define __UINT16_C_SUFFIX__ ++// AARCH64-NEXT: #define __UINT16_C_SUFFIX__ + // AARCH64-NEXT: #define __UINT16_FMTX__ "hX" + // AARCH64-NEXT: #define __UINT16_FMTo__ "ho" + // AARCH64-NEXT: #define __UINT16_FMTu__ "hu" +@@ -259,7 +259,7 @@ + // AARCH64-NEXT: #define __UINT64_FMTx__ "lx" + // AARCH64-NEXT: #define __UINT64_MAX__ 18446744073709551615UL + // AARCH64-NEXT: #define __UINT64_TYPE__ long unsigned int +-// AARCH64-NEXT: #define __UINT8_C_SUFFIX__ ++// AARCH64-NEXT: #define __UINT8_C_SUFFIX__ + // AARCH64-NEXT: #define __UINT8_FMTX__ "hhX" + // AARCH64-NEXT: #define __UINT8_FMTo__ "hho" + // AARCH64-NEXT: #define __UINT8_FMTu__ "hhu" +@@ -329,7 +329,7 @@ + // AARCH64-NEXT: #define __UINT_LEAST8_FMTx__ "hhx" + // AARCH64-NEXT: #define __UINT_LEAST8_MAX__ 255 + // AARCH64-NEXT: #define __UINT_LEAST8_TYPE__ unsigned char +-// AARCH64-NEXT: #define __USER_LABEL_PREFIX__ ++// AARCH64-NEXT: #define __USER_LABEL_PREFIX__ + // AARCH64-NEXT: #define __VERSION__ "{{.*}}" + // AARCH64-NEXT: #define __WCHAR_MAX__ 4294967295U + // AARCH64-NEXT: #define __WCHAR_TYPE__ unsigned int +diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c +index 083f0e539d88..6c80517ec4d4 100644 +--- a/clang/test/Preprocessor/predefined-macros.c ++++ b/clang/test/Preprocessor/predefined-macros.c +@@ -129,6 +129,8 @@ + // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL12 + // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL2.0 \ + // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL20 ++// RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL3.0 \ ++// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL30 + // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-fast-relaxed-math \ + // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FRM + // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=clc++ \ +@@ -137,26 +139,37 @@ + // CHECK-CL10: #define CL_VERSION_1_1 110 + // CHECK-CL10: #define CL_VERSION_1_2 120 + // CHECK-CL10: #define CL_VERSION_2_0 200 ++// CHECK-CL10: #define CL_VERSION_3_0 300 + // CHECK-CL10: #define __OPENCL_C_VERSION__ 100 + // CHECK-CL10-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CL11: #define CL_VERSION_1_0 100 + // CHECK-CL11: #define CL_VERSION_1_1 110 + // CHECK-CL11: #define CL_VERSION_1_2 120 + // CHECK-CL11: #define CL_VERSION_2_0 200 ++// CHECK-CL11: #define CL_VERSION_3_0 300 + // CHECK-CL11: #define __OPENCL_C_VERSION__ 110 + // CHECK-CL11-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CL12: #define CL_VERSION_1_0 100 + // CHECK-CL12: #define CL_VERSION_1_1 110 + // CHECK-CL12: #define CL_VERSION_1_2 120 + // CHECK-CL12: #define CL_VERSION_2_0 200 ++// CHECK-CL12: #define CL_VERSION_3_0 300 + // CHECK-CL12: #define __OPENCL_C_VERSION__ 120 + // CHECK-CL12-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CL20: #define CL_VERSION_1_0 100 + // CHECK-CL20: #define CL_VERSION_1_1 110 + // CHECK-CL20: #define CL_VERSION_1_2 120 + // CHECK-CL20: #define CL_VERSION_2_0 200 ++// CHECK-CL20: #define CL_VERSION_3_0 300 + // CHECK-CL20: #define __OPENCL_C_VERSION__ 200 + // CHECK-CL20-NOT: #define __FAST_RELAXED_MATH__ 1 ++// CHECK-CL30: #define CL_VERSION_1_0 100 ++// CHECK-CL30: #define CL_VERSION_1_1 110 ++// CHECK-CL30: #define CL_VERSION_1_2 120 ++// CHECK-CL30: #define CL_VERSION_2_0 200 ++// CHECK-CL30: #define CL_VERSION_3_0 300 ++// CHECK-CL30: #define __OPENCL_C_VERSION__ 300 ++// CHECK-CL30-NOT: #define __FAST_RELAXED_MATH__ 1 + // CHECK-FRM: #define __FAST_RELAXED_MATH__ 1 + // CHECK-CLCPP10: #define __CL_CPP_VERSION_1_0__ 100 + // CHECK-CLCPP10: #define __OPENCL_CPP_VERSION__ 100 +diff --git a/clang/test/Sema/feature-extensions-simult-support.cl b/clang/test/Sema/feature-extensions-simult-support.cl +new file mode 100644 +index 000000000000..0789105002b2 +--- /dev/null ++++ b/clang/test/Sema/feature-extensions-simult-support.cl +@@ -0,0 +1,75 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_subgroups ++ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups ++ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_device_enqueue ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_read_write_images ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64,-cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes,-cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups,-cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_3d_image_writes ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_subgroups ++ ++// expected-no-diagnostics ++ ++#ifdef cl_khr_fp64 ++ #ifndef __opencl_c_fp64 ++ #error macros were not properly set up ++ #endif ++#endif ++#ifdef __opencl_c_fp64 ++ #ifndef cl_khr_fp64 ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#ifdef cl_khr_3d_image_writes ++ #ifndef __opencl_c_3d_image_writes ++ #error macros were not properly set up ++ #endif ++#endif ++#ifdef __opencl_c_3d_image_writes ++ #ifndef cl_khr_3d_image_writes ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#ifdef cl_khr_subgroups ++ #ifndef __opencl_c_subgroups ++ #error macros were not properly set up ++ #endif ++#endif ++#ifdef __opencl_c_subgroups ++ #ifndef cl_khr_subgroups ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#if defined(__opencl_c_pipes) || defined(__opencl_c_device_enqueue) ++ #ifndef __opencl_c_generic_address_space ++ #error macros were not properly set up ++ #endif ++#endif ++ ++#if defined(__opencl_c_3d_image_writes) || defined(__opencl_c_read_write_images) ++ #ifndef __opencl_c_images ++ #error macros were not properly set up ++ #endif ++#endif ++ ++kernel void test(){} +diff --git a/clang/test/Sema/features-ignore-pragma.cl b/clang/test/Sema/features-ignore-pragma.cl +new file mode 100644 +index 000000000000..046ce5390754 +--- /dev/null ++++ b/clang/test/Sema/features-ignore-pragma.cl +@@ -0,0 +1,24 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_3d_image_writes ++ ++#pragma OPENCL EXTENSION __opencl_c_fp64 : enable ++// expected-warning@-1 {{OpenCL feature support can't be controlled via pragma, ignoring}} ++ ++#pragma OPENCL EXTENSION cl_khr_fp64 : enable ++#ifndef __opencl_c_fp64 ++// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_fp64' - ignoring}} ++#endif ++ ++#pragma OPENCL EXTENSION cl_khr_subgroups : enable ++#ifndef __opencl_c_subgroups ++// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_subgroups' - ignoring}} ++#endif ++ ++#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable ++#ifndef __opencl_c_3d_image_writes ++// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_3d_image_writes' - ignoring}} ++#endif ++ ++kernel void foo() {} +diff --git a/clang/test/Sema/opencl-features-pipes.cl b/clang/test/Sema/opencl-features-pipes.cl +new file mode 100644 +index 000000000000..c0ac778f24a6 +--- /dev/null ++++ b/clang/test/Sema/opencl-features-pipes.cl +@@ -0,0 +1,18 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.1 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -DHAS ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -DHAS ++// expected-no-diagnostics ++ ++#ifdef HAS ++ #ifndef __opencl_c_pipes ++ #error Feature should be defined ++ #endif ++#else ++ #ifdef __opencl_c_pipes ++ #error Feature should not be defined ++ #endif ++#endif ++ ++kernel void foo() {} +diff --git a/clang/test/Sema/opencl-features.cl b/clang/test/Sema/opencl-features.cl +new file mode 100644 +index 000000000000..aa432f6b60bf +--- /dev/null ++++ b/clang/test/Sema/opencl-features.cl +@@ -0,0 +1,128 @@ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-__opencl_c_device_enqueue,-__opencl_c_pipes,-__opencl_c_read_write_images ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CLC++ ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 ++// expected-no-diagnostics ++ ++#ifndef __opencl_c_int64 ++ #error Feature __opencl_c_int64 shouldn't be defined ++#endif ++ ++#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) ++ #ifndef __opencl_c_3d_image_writes ++ #error Feature __opencl_c_3d_image_writes should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_order_acq_rel ++ #error Feature __opencl_c_atomic_order_acq_rel should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_order_seq_cst ++ #error Feature __opencl_c_atomic_order_seq_cst should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_scope_device ++ #error Feature __opencl_c_atomic_scope_device should be defined ++ #endif ++ ++ #ifndef __opencl_c_atomic_scope_all_devices ++ #error Feature __opencl_c_atomic_scope_all_devices should be defined ++ #endif ++ ++ #ifndef __opencl_c_device_enqueue ++ #error Feature __opencl_c_device_enqueue should be defined ++ #endif ++ ++ #ifndef __opencl_c_generic_address_space ++ #error Feature __opencl_c_generic_address_space should be defined ++ #endif ++ ++ #ifndef __opencl_c_pipes ++ #error Feature __opencl_c_pipes should be defined ++ #endif ++ ++ #ifndef __opencl_c_program_scope_global_variables ++ #error Feature __opencl_c_program_scope_global_variables should be defined ++ #endif ++ ++ #ifndef __opencl_c_read_write_images ++ #error Feature __opencl_c_read_write_images should be defined ++ #endif ++ ++ #ifndef __opencl_c_subgroups ++ #error Feature __opencl_c_subgroups should be defined ++ #endif ++ ++ #ifndef __opencl_c_work_group_collective_functions ++ #error Feature __opencl_c_work_group_collective_functions should be defined ++ #endif ++ ++ #ifndef __opencl_c_fp64 ++ #error Feature __opencl_c_fp64 should be defined ++ #endif ++ ++ #ifndef __opencl_c_images ++ #error Feature __opencl_c_images should be defined ++ #endif ++#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ++ ++ ++#if __OPENCL_C_VERSION__ == CL_VERSION_3_0 ++ #ifdef __opencl_c_3d_image_writes ++ #error Feature __opencl_c_3d_image_writes shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_order_acq_rel ++ #error Feature __opencl_c_atomic_order_acq_rel shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_order_seq_cst ++ #error Feature __opencl_c_atomic_order_seq_cst shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_scope_device ++ #error Feature __opencl_c_atomic_scope_device shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_atomic_scope_all_devices ++ #error Feature __opencl_c_atomic_scope_all_devices shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_device_enqueue ++ #error Feature __opencl_c_device_enqueue shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_generic_address_space ++ #error Feature __opencl_c_generic_address_space shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_pipes ++ #error Feature __opencl_c_pipes shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_program_scope_global_variables ++ #error Feature __opencl_c_program_scope_global_variables shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_read_write_images ++ #error Feature __opencl_c_read_write_images shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_subgroups ++ #error Feature __opencl_c_subgroups shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_work_group_collective_functions ++ #error Feature __opencl_c_work_group_collective_functions shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_fp64 ++ #error Feature __opencl_c_fp64 shouldn't be defined ++ #endif ++ ++ #ifdef __opencl_c_images ++ #error Feature __opencl_c_images shouldn't be defined ++ #endif ++#endif // __OPENCL_C_VERSION__ == CL_VERSION_3_0 ++ ++kernel void foo() {} +diff --git a/clang/test/Sema/pipe_builtins_feature.cl b/clang/test/Sema/pipe_builtins_feature.cl +new file mode 100644 +index 000000000000..56fa94fc7705 +--- /dev/null ++++ b/clang/test/Sema/pipe_builtins_feature.cl +@@ -0,0 +1,21 @@ ++// RUN: %clang_cc1 -cl-std=CL2.0 -fsyntax-only -verify %s ++// RUN: %clang_cc1 -cl-std=CL3.0 -fsyntax-only -verify %s ++// RUN: %clang_cc1 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -fsyntax-only -verify %s ++ ++#ifdef __opencl_c_pipes ++ #ifndef __opencl_c_generic_address_space ++ #error Generic address space feature must also be defined ++ #endif ++// CHECK: expected-no-diagnostics ++// check that pragma disable all doesn't touch feature support ++ #pragma OPENCL EXTENSION all : disable ++#endif ++ ++void test(read_only pipe int p, global int *ptr) { ++ reserve_id_t rid; ++} ++ ++#ifndef __opencl_c_pipes ++// expected-error@-5 {{expected parameter declarator}} expected-error@-5 {{expected ')'}} expected-note@-5 {{to match this '('}} ++// expected-error@-5 {{use of type 'reserve_id_t' requires __opencl_c_pipes extension to be enabled}} ++#endif +diff --git a/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl b/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl +index a5a838241347..dbdc5cffd073 100644 +--- a/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl ++++ b/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl +@@ -1,6 +1,9 @@ + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL2.0 + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL2.0 + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL2.0 ++// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space ++// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space ++// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=clc++ + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=clc++ + // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=clc++ +diff --git a/clang/test/SemaOpenCL/address-spaces.cl b/clang/test/SemaOpenCL/address-spaces.cl +index e9825fd1682a..563f1e649a82 100644 +--- a/clang/test/SemaOpenCL/address-spaces.cl ++++ b/clang/test/SemaOpenCL/address-spaces.cl +@@ -1,5 +1,6 @@ + // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only + // RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -verify -pedantic -fsyntax-only + // RUN: %clang_cc1 %s -cl-std=clc++ -verify -pedantic -fsyntax-only + + __constant int ci = 1; +diff --git a/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl b/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl +index f63e2913c749..727141190a0b 100644 +--- a/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl ++++ b/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl +@@ -2,6 +2,12 @@ + // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir-unknown-unknown" -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile" + // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS= + // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile" ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS= ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile" ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS= ++// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile" ++ ++ + + typedef struct {int a;} ndrange_t; + // Diagnostic tests for different overloads of enqueue_kernel from Table 6.13.17.1 of OpenCL 2.0 Spec. +@@ -235,11 +241,17 @@ kernel void bar(global unsigned int *buf) + kernel void foo1(global unsigned int *buf) + { + ndrange_t n; +- buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++ buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); ++#if __OPENCL_C_VERSION__ < 300 ++// expected-error@-2 {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++#endif + } + + kernel void bar1(global unsigned int *buf) + { + ndrange_t n; +- buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++ buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); ++#if __OPENCL_C_VERSION__ < 300 ++// expected-error@-2 {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} ++#endif + } +diff --git a/clang/test/SemaOpenCL/forget-unsupported-builtins.cl b/clang/test/SemaOpenCL/forget-unsupported-builtins.cl +new file mode 100644 +index 000000000000..14dd03e2c7db +--- /dev/null ++++ b/clang/test/SemaOpenCL/forget-unsupported-builtins.cl +@@ -0,0 +1,22 @@ ++// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -cl-ext=__opencl_c_pipes,__opencl_c_generic_address_space,__opencl_c_device_enqueue -verify %s -triple spir-unknown-unknown -DFEATURES ++ ++#ifndef FEATURES ++ // expected-no-diagnostics ++#else ++ // expected-error@+10 {{cannot redeclare builtin function 'get_pipe_max_packets'}} ++ // expected-note@+9 {{'get_pipe_max_packets' is a builtin with type 'unsigned int ()'}} ++ // expected-error@+9 {{cannot redeclare builtin function 'to_local'}} ++ // expected-note@+8 {{'to_local' is a builtin with type 'void *(void *)'}} ++ // expected-error@+8 {{cannot redeclare builtin function 'to_global'}} ++ // expected-note@+7 {{'to_global' is a builtin with type 'void *(void *)'}} ++ // expected-error@+7 {{cannot redeclare builtin function 'get_kernel_work_group_size'}} ++ // expected-note@+6 {{'get_kernel_work_group_size' is a builtin with type 'unsigned int ()'}} ++#endif ++ ++int get_pipe_max_packets(int); ++int to_local(int); ++int to_global(int); ++int get_kernel_work_group_size(int); ++ ++kernel void test(global int *dst) {} +diff --git a/clang/test/SemaOpenCL/image-features.cl b/clang/test/SemaOpenCL/image-features.cl +new file mode 100644 +index 000000000000..ace6913bb31e +--- /dev/null ++++ b/clang/test/SemaOpenCL/image-features.cl +@@ -0,0 +1,20 @@ ++// RUN: %clang_cc1 -cl-std=cl2.0 -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_images -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_3d_image_writes -fsyntax-only -verify %s -triple spir-unknown-unknown ++// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_read_write_images -fsyntax-only -verify %s -triple spir-unknown-unknown ++ ++#if defined(__opencl_c_read_write_images) && defined(__opencl_c_3d_image_writes) ++ // expected-no-diagnostics ++#endif ++ ++__kernel void write_3d_image(__write_only image3d_t i) {} ++ ++#ifndef __opencl_c_3d_image_writes ++ // expected-error@-3 {{use of type '__write_only image3d_t' requires __opencl_c_3d_image_writes extension to be enabled}} ++#endif ++ ++__kernel void read_write_3d_image(__read_write image3d_t i) { } ++ ++#ifndef __opencl_c_read_write_images ++ // expected-error@-3 {{use of type '__read_write image3d_t' requires __opencl_c_read_write_images extension to be enabled}} ++#endif +diff --git a/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl b/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl +index 36e76621d24a..38b0a04726e3 100644 +--- a/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl ++++ b/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups,__opencl_c_pipes + + #pragma OPENCL EXTENSION cl_khr_subgroups : enable + +diff --git a/clang/test/SemaOpenCL/storageclass-cl20.cl b/clang/test/SemaOpenCL/storageclass-cl20.cl +index 581701d2a6a5..469c526ebc30 100644 +--- a/clang/test/SemaOpenCL/storageclass-cl20.cl ++++ b/clang/test/SemaOpenCL/storageclass-cl20.cl +@@ -1,4 +1,5 @@ + // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 ++// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables,__opencl_c_generic_address_space + + int G2 = 0; + global int G3 = 0; +diff --git a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp +index 7c63cf51ecfa..785ff93aaf85 100644 +--- a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp ++++ b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp +@@ -56,6 +56,7 @@ + //===----------------------------------------------------------------------===// + + #include "TableGenBackends.h" ++#include "clang/Basic/OpenCLOptions.h" + #include "llvm/ADT/MapVector.h" + #include "llvm/ADT/STLExtras.h" + #include "llvm/ADT/SmallString.h" +@@ -69,6 +70,7 @@ + #include "llvm/TableGen/Record.h" + #include "llvm/TableGen/StringMatcher.h" + #include "llvm/TableGen/TableGenBackend.h" ++#include + #include + + using namespace llvm; +@@ -228,6 +230,10 @@ private: + // The function "tan", having the same signatures, would be mapped to the + // same entry (). + MapVector SignatureListMap; ++ ++ // Encode all versions ++ unsigned short ++ EncodeBuiltinVersions(std::vector BuiltinVersionsRecords) const; + }; + } // namespace + +@@ -338,12 +344,10 @@ struct OpenCLBuiltinStruct { + const bool IsConst : 1; + // Function attribute __attribute__((convergent)) + const bool IsConv : 1; ++ // All opencl versions encoded ++ const unsigned char AllVersions : 5; + // OpenCL extension(s) required for this overload. + const unsigned short Extension; +- // First OpenCL version in which this overload was introduced (e.g. CL20). +- const unsigned short MinVersion; +- // First OpenCL version in which this overload was removed (e.g. CL20). +- const unsigned short MaxVersion; + }; + + )"; +@@ -510,11 +514,9 @@ void BuiltinNameEmitter::EmitBuiltinTable() { + << (Overload.first->getValueAsBit("IsPure")) << ", " + << (Overload.first->getValueAsBit("IsConst")) << ", " + << (Overload.first->getValueAsBit("IsConv")) << ", " +- << FunctionExtensionIndex[ExtName] << ", " +- << Overload.first->getValueAsDef("MinVersion")->getValueAsInt("ID") +- << ", " +- << Overload.first->getValueAsDef("MaxVersion")->getValueAsInt("ID") +- << " },\n"; ++ << EncodeBuiltinVersions( ++ Overload.first->getValueAsListOfDefs("Versions")) ++ << ", " << FunctionExtensionIndex[ExtName] << " },\n"; + Index++; + } + } +@@ -535,10 +537,8 @@ bool BuiltinNameEmitter::CanReuseSignature( + if (Rec->getValueAsBit("IsPure") == Rec2->getValueAsBit("IsPure") && + Rec->getValueAsBit("IsConst") == Rec2->getValueAsBit("IsConst") && + Rec->getValueAsBit("IsConv") == Rec2->getValueAsBit("IsConv") && +- Rec->getValueAsDef("MinVersion")->getValueAsInt("ID") == +- Rec2->getValueAsDef("MinVersion")->getValueAsInt("ID") && +- Rec->getValueAsDef("MaxVersion")->getValueAsInt("ID") == +- Rec2->getValueAsDef("MaxVersion")->getValueAsInt("ID") && ++ EncodeBuiltinVersions(Rec->getValueAsListOfDefs("Versions")) == ++ EncodeBuiltinVersions(Rec2->getValueAsListOfDefs("Versions")) && + Rec->getValueAsDef("Extension")->getName() == + Rec2->getValueAsDef("Extension")->getName()) { + return true; +@@ -814,6 +814,15 @@ static void OCL2Qual(ASTContext &Context, const OpenCLTypeStruct &Ty, + OS << "\n} // OCL2Qual\n"; + } + ++unsigned short BuiltinNameEmitter::EncodeBuiltinVersions( ++ std::vector BuiltinVersionsRecords) const { ++ return std::accumulate( ++ BuiltinVersionsRecords.begin(), BuiltinVersionsRecords.end(), ++ (unsigned short)0, [](unsigned short C, Record *R) { ++ return C |= clang::OpenCLOptions::EncodeVersion(R->getValueAsInt("ID")); ++ }); ++} ++ + void clang::EmitClangOpenCLBuiltins(RecordKeeper &Records, raw_ostream &OS) { + BuiltinNameEmitter NameChecker(Records, OS); + NameChecker.Emit(); +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-Remove-repo-name-in-LLVM-IR.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-Remove-repo-name-in-LLVM-IR.patch new file mode 100644 index 00000000..09089432 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-Remove-repo-name-in-LLVM-IR.patch @@ -0,0 +1,50 @@ +From ff0a6da84b94c16c4519c649f1f7bed3cdf89bbb Mon Sep 17 00:00:00 2001 +From: Feng Zou +Date: Tue, 20 Oct 2020 11:29:04 +0800 +Subject: [PATCH] Remove repo name in LLVM IR + +Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-110/patches/llvm/0002-Remove-repo-name-in-LLVM-IR.patch] +Signed-off-by: Feng Zou +Signed-off-by: Naveen Saini + +--- + llvm/cmake/modules/VersionFromVCS.cmake | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake +index 18edbeabe3e..2d965263478 100644 +--- a/llvm/cmake/modules/VersionFromVCS.cmake ++++ b/llvm/cmake/modules/VersionFromVCS.cmake +@@ -33,17 +33,18 @@ function(get_source_info path revision repository) + else() + set(remote "origin") + endif() +- execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote} +- WORKING_DIRECTORY ${path} +- RESULT_VARIABLE git_result +- OUTPUT_VARIABLE git_output +- ERROR_QUIET) +- if(git_result EQUAL 0) +- string(STRIP "${git_output}" git_output) +- set(${repository} ${git_output} PARENT_SCOPE) +- else() +- set(${repository} ${path} PARENT_SCOPE) +- endif() ++ # Do not show repo name in IR ++ # execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote} ++ # WORKING_DIRECTORY ${path} ++ # RESULT_VARIABLE git_result ++ # OUTPUT_VARIABLE git_output ++ # ERROR_QUIET) ++ # if(git_result EQUAL 0) ++ # string(STRIP "${git_output}" git_output) ++ # set(${repository} ${git_output} PARENT_SCOPE) ++ # else() ++ # set(${repository} ${path} PARENT_SCOPE) ++ # endif() + endif() + else() + message(WARNING "Git not found. Version cannot be determined.") +-- +2.18.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend index 17e991ab..4e2bc535 100644 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend @@ -2,8 +2,8 @@ FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:" SPIRV_BRANCH = "${@bb.utils.contains('LLVMVERSION', '10.0.1', 'llvm_release_100', 'llvm_release_110', d)}" -SPIRV10_SRCREV = "4d43f68a30a510b4e7607351caab3df8e7426a6b" -SPIRV11_SRCREV = "93032d36d2fe17befb7994714c07c67ea68efbea" +SPIRV10_SRCREV = "576abae62cecd171992017a4a786e3831221ab8d" +SPIRV11_SRCREV = "2a8c1e6c9778deaa720a23e08c293006dc5d56fd" SPIRV_SRCREV = "${@bb.utils.contains('LLVMVERSION', '10.0.1', '${SPIRV10_SRCREV}', '${SPIRV11_SRCREV}', d)}" @@ -12,10 +12,18 @@ SRC_URI_LLVM10 = " \ file://fix-shared-libs.patch;patchdir=llvm/projects/llvm-spirv \ file://BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch;patchdir=llvm \ file://IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch;patchdir=llvm \ + file://llvm10-OpenCL-3.0-support.patch \ + file://0002-Add-cl_khr_extended_subgroup-extensions.patch \ + file://0001-Memory-leak-fix-for-Managed-Static-Mutex.patch \ + file://llvm10-Remove-repo-name-in-LLVM-IR.patch \ + file://0001-Fix-debug-info-of-work-item-builtin-translation-745.patch;patchdir=llvm/projects/llvm-spirv \ " SRC_URI_LLVM11 = " \ file://llvm11-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \ + file://llvm11-OpenCL-3.0-support.patch \ + file://0001-Memory-leak-fix-for-Managed-Static-Mutex.patch \ + file://llvm11-Remove-repo-name-in-LLVM-IR.patch \ " SPIRV_LLVM_SRC_URI = "git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=${SPIRV_BRANCH};destsuffix=git/llvm/projects/llvm-spirv;name=spirv" -- cgit v1.2.3-54-g00ecf