From a685de6fc45afcdbe4a7120e9d5b33e175dd71cd Mon Sep 17 00:00:00 2001 From: haonanya Date: Fri, 13 Aug 2021 10:00:02 +0800 Subject: [PATCH 7/7] support cl_ext_float_atomics Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch] Signed-off-by: haonanya Signed-off-by: Naveen Saini --- clang/lib/Headers/opencl-c-base.h | 25 ++++ clang/lib/Headers/opencl-c.h | 208 ++++++++++++++++++++++++++ clang/test/Headers/opencl-c-header.cl | 96 ++++++++++++ 3 files changed, 329 insertions(+) diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h index 2cc688ccc3da..86bbee12fdf8 100644 --- a/clang/lib/Headers/opencl-c-base.h +++ b/clang/lib/Headers/opencl-c-base.h @@ -14,6 +14,31 @@ #define CL_VERSION_3_0 300 #endif +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) +// For SPIR all extensions are supported. +#if defined(__SPIR__) +#define cl_ext_float_atomics 1 +#ifdef cl_khr_fp16 +#define __opencl_c_ext_fp16_global_atomic_load_store 1 +#define __opencl_c_ext_fp16_local_atomic_load_store 1 +#define __opencl_c_ext_fp16_global_atomic_add 1 +#define __opencl_c_ext_fp16_local_atomic_add 1 +#define __opencl_c_ext_fp16_global_atomic_min_max 1 +#define __opencl_c_ext_fp16_local_atomic_min_max 1 +#endif +#ifdef __opencl_c_fp64 +#define __opencl_c_ext_fp64_global_atomic_add 1 +#define __opencl_c_ext_fp64_local_atomic_add 1 +#define __opencl_c_ext_fp64_global_atomic_min_max 1 +#define __opencl_c_ext_fp64_local_atomic_min_max 1 +#endif +#define __opencl_c_ext_fp32_global_atomic_add 1 +#define __opencl_c_ext_fp32_local_atomic_add 1 +#define __opencl_c_ext_fp32_global_atomic_min_max 1 +#define __opencl_c_ext_fp32_local_atomic_min_max 1 +#endif // defined(__SPIR__) +#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) + // Define features for 2.0 for header backward compatibility #ifndef __opencl_c_int64 #define __opencl_c_int64 1 diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index 67d900eb1c3d..b463e702d95e 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -14354,6 +14354,214 @@ intptr_t __ovld atomic_fetch_max_explicit( // defined(cl_khr_int64_extended_atomics) #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) +#if defined(cl_ext_float_atomics) + +#if defined(__opencl_c_ext_fp32_global_atomic_min_max) +float __ovld atomic_fetch_min(volatile __global atomic_float *object, + float operand); +float __ovld atomic_fetch_max(volatile __global atomic_float *object, + float operand); +float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) + +#if defined(__opencl_c_ext_fp32_local_atomic_min_max) +float __ovld atomic_fetch_min(volatile __local atomic_float *object, + float operand); +float __ovld atomic_fetch_max(volatile __local atomic_float *object, + float operand); +float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp32_global_atomic_min_max) || \ + defined(__opencl_c_ext_fp32_local_atomic_min_max) +float __ovld atomic_fetch_min(volatile atomic_float *object, float operand); +float __ovld atomic_fetch_max(volatile atomic_float *object, float operand); +float __ovld atomic_fetch_min_explicit(volatile atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_min_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) || \ + defined(__opencl_c_ext_fp32_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp64_global_atomic_min_max) +double __ovld atomic_fetch_min(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_max(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) + +#if defined(__opencl_c_ext_fp64_local_atomic_min_max) +double __ovld atomic_fetch_min(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_max(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp64_global_atomic_min_max) || \ + defined(__opencl_c_ext_fp64_local_atomic_min_max) +double __ovld atomic_fetch_min(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_max(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) || \ + defined(__opencl_c_ext_fp64_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp32_global_atomic_add) +float __ovld atomic_fetch_add(volatile __global atomic_float *object, + float operand); +float __ovld atomic_fetch_sub(volatile __global atomic_float *object, + float operand); +float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_global_atomic_add) + +#if defined(__opencl_c_ext_fp32_local_atomic_add) +float __ovld atomic_fetch_add(volatile __local atomic_float *object, + float operand); +float __ovld atomic_fetch_sub(volatile __local atomic_float *object, + float operand); +float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_local_atomic_add) + +#if defined(__opencl_c_ext_fp32_global_atomic_add) || \ + defined(__opencl_c_ext_fp32_local_atomic_add) +float __ovld atomic_fetch_add(volatile atomic_float *object, float operand); +float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand); +float __ovld atomic_fetch_add_explicit(volatile atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_add_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_global_atomic_add) || \ + defined(__opencl_c_ext_fp32_local_atomic_add) + +#if defined(__opencl_c_ext_fp64_global_atomic_add) +double __ovld atomic_fetch_add(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_sub(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_global_atomic_add) + +#if defined(__opencl_c_ext_fp64_local_atomic_add) +double __ovld atomic_fetch_add(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_sub(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_local_atomic_add) + +#if defined(__opencl_c_ext_fp64_global_atomic_add) || \ + defined(__opencl_c_ext_fp64_local_atomic_add) +double __ovld atomic_fetch_add(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_global_atomic_add) || \ + defined(__opencl_c_ext_fp64_local_atomic_add) + +#endif // cl_ext_float_atomics + // atomic_store() #if defined(__opencl_c_atomic_scope_device) && \ diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl index 2716076acdcf..7f720cf28142 100644 --- a/clang/test/Headers/opencl-c-header.cl +++ b/clang/test/Headers/opencl-c-header.cl @@ -98,3 +98,99 @@ global atomic_int z = ATOMIC_VAR_INIT(99); #pragma OPENCL EXTENSION cl_intel_planar_yuv : enable // CHECK-MOD: Reading modules + +// For SPIR all extensions are supported. +#if defined(__SPIR__) + +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) + +#if __opencl_c_ext_fp16_global_atomic_load_store != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_load_store" +#endif +#if __opencl_c_ext_fp16_local_atomic_load_store != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_load_store" +#endif +#if __opencl_c_ext_fp16_global_atomic_add != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_add" +#endif +#if __opencl_c_ext_fp32_global_atomic_add != 1 +#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_add" +#endif +#if __opencl_c_ext_fp64_global_atomic_add != 1 +#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_add" +#endif +#if __opencl_c_ext_fp16_local_atomic_add != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_add" +#endif +#if __opencl_c_ext_fp32_local_atomic_add != 1 +#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_add" +#endif +#if __opencl_c_ext_fp64_local_atomic_add != 1 +#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_add" +#endif +#if __opencl_c_ext_fp16_global_atomic_min_max != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_min_max" +#endif +#if __opencl_c_ext_fp32_global_atomic_min_max != 1 +#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_min_max" +#endif +#if __opencl_c_ext_fp64_global_atomic_min_max != 1 +#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_min_max" +#endif +#if __opencl_c_ext_fp16_local_atomic_min_max != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_min_max" +#endif +#if __opencl_c_ext_fp32_local_atomic_min_max != 1 +#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_min_max" +#endif +#if __opencl_c_ext_fp64_local_atomic_min_max != 1 +#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_min_max" +#endif +#else + +#ifdef __opencl_c_ext_fp16_global_atomic_load_store +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_load_store defined" +#endif +#ifdef __opencl_c_ext_fp16_local_atomic_load_store +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_load_store defined" +#endif +#ifdef __opencl_c_ext_fp16_global_atomic_add +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp32_global_atomic_add +#error "Incorrectly __opencl_c_ext_fp32_global_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp64_global_atomic_add +#error "Incorrectly __opencl_c_ext_fp64_global_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp16_local_atomic_add +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp32_local_atomic_add +#error "Incorrectly __opencl_c_ext_fp32_local_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp64_local_atomic_add +#error "Incorrectly __opencl_c_ext_fp64_local_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp16_global_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_min_max defined" +#endif +#ifdef __opencl_c_ext_fp32_global_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp32_global_atomic_min_max defined" +#endif +#ifdef __opencl_c_ext_fp64_global_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp64_global_atomic_min_max defined" +#endif +#ifdef __opencl_c_ext_fp16_local_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_min_max defined" +#endif +#ifdef __opencl_c_ext_fp32_local_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp32_local_atomic_min_max defined" +#endif +#ifdef __opencl_c_ext_fp64_local_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp64_local_atomic_min_max defined" +#endif + +#endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) + +#endif // defined(__SPIR__) -- 2.17.1