diff options
9 files changed, 1788 insertions, 0 deletions
diff --git a/conf/layer.conf b/conf/layer.conf index 3e75b0fe..4cea5500 100644 --- a/conf/layer.conf +++ b/conf/layer.conf | |||
| @@ -20,5 +20,9 @@ LAYERRECOMMENDS_intel = "dpdk intel-qat" | |||
| 20 | LAYERVERSION_intel = "5" | 20 | LAYERVERSION_intel = "5" |
| 21 | LAYERSERIES_COMPAT_intel = "thud warrior" | 21 | LAYERSERIES_COMPAT_intel = "thud warrior" |
| 22 | 22 | ||
| 23 | BBFILES_DYNAMIC += " \ | ||
| 24 | clang-layer:${LAYERDIR}/dynamic-layers/clang-layer/*/*/*.bb \ | ||
| 25 | clang-layer:${LAYERDIR}/dynamic-layers/clang-layer/*/*/*.bbappend \ | ||
| 26 | " | ||
| 23 | 27 | ||
| 24 | require ${LAYERDIR}/conf/include/maintainers.inc | 28 | require ${LAYERDIR}/conf/include/maintainers.inc |
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend new file mode 100644 index 00000000..f8d5a252 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:" | ||
| 2 | |||
| 3 | DEPENDS_append = " opencl-clang-native" | ||
| 4 | LLVM_TARGETS_TO_BUILD = "X86" | ||
| 5 | |||
| 6 | do_install_append_intel-x86-common() { | ||
| 7 | DESTDIR=${D} ninja -v install-cmake-exports | ||
| 8 | } | ||
| 9 | |||
| 10 | LIBCPLUSPLUS = "" | ||
| 11 | |||
| 12 | # undefined reference to `__atomic_load' on i*86. | ||
| 13 | COMPATIBLE_HOST = '(x86_64).*-linux' | ||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch new file mode 100644 index 00000000..1c491402 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch | |||
| @@ -0,0 +1,156 @@ | |||
| 1 | From 39a3ac0065c23d1e2d55dfd8792cc28a146a4307 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Alexey Bader <alexey.bader@intel.com> | ||
| 3 | Date: Tue, 19 Feb 2019 15:19:06 +0000 | ||
| 4 | Subject: [PATCH 1/2] [OpenCL] Change type of block pointer for OpenCL | ||
| 5 | |||
| 6 | Summary: | ||
| 7 | |||
| 8 | For some reason OpenCL blocks in LLVM IR are represented as function pointers. | ||
| 9 | These pointers do not point to any real function and never get called. Actually | ||
| 10 | they point to some structure, which in turn contains pointer to the real block | ||
| 11 | invoke function. | ||
| 12 | This patch changes represntation of OpenCL blocks in LLVM IR from function | ||
| 13 | pointers to pointers to `%struct.__block_literal_generic`. | ||
| 14 | Such representation allows to avoid unnecessary bitcasts and simplifies | ||
| 15 | further processing (e.g. translation to SPIR-V ) of the module for targets | ||
| 16 | which do not support function pointers. | ||
| 17 | |||
| 18 | Patch by: Alexey Sotkin. | ||
| 19 | |||
| 20 | Reviewers: Anastasia, yaxunl, svenvh | ||
| 21 | |||
| 22 | Reviewed By: Anastasia | ||
| 23 | |||
| 24 | Subscribers: alexbatashev, cfe-commits | ||
| 25 | |||
| 26 | Tags: #clang | ||
| 27 | |||
| 28 | Differential Revision: https://reviews.llvm.org/D58277 | ||
| 29 | |||
| 30 | git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354337 91177308-0d34-0410-b5e6-96231b3b80d8 | ||
| 31 | |||
| 32 | Upstream-Status: Backport | ||
| 33 | [https://github.com/llvm-mirror/clang/commit/283f308bdb5893bab1f36791711346e746045f94] | ||
| 34 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
| 35 | --- | ||
| 36 | lib/CodeGen/CodeGenTypes.cpp | 4 +++- | ||
| 37 | test/CodeGenOpenCL/blocks.cl | 18 ++++++++---------- | ||
| 38 | test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 18 +++++++++--------- | ||
| 39 | 3 files changed, 20 insertions(+), 20 deletions(-) | ||
| 40 | |||
| 41 | diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp | ||
| 42 | index 2acf1ac..93b3ebf 100644 | ||
| 43 | --- a/lib/CodeGen/CodeGenTypes.cpp | ||
| 44 | +++ b/lib/CodeGen/CodeGenTypes.cpp | ||
| 45 | @@ -637,7 +637,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { | ||
| 46 | |||
| 47 | case Type::BlockPointer: { | ||
| 48 | const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType(); | ||
| 49 | - llvm::Type *PointeeType = ConvertTypeForMem(FTy); | ||
| 50 | + llvm::Type *PointeeType = CGM.getLangOpts().OpenCL | ||
| 51 | + ? CGM.getGenericBlockLiteralType() | ||
| 52 | + : ConvertTypeForMem(FTy); | ||
| 53 | unsigned AS = Context.getTargetAddressSpace(FTy); | ||
| 54 | ResultType = llvm::PointerType::get(PointeeType, AS); | ||
| 55 | break; | ||
| 56 | diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl | ||
| 57 | index 675240c..19aacc3 100644 | ||
| 58 | --- a/test/CodeGenOpenCL/blocks.cl | ||
| 59 | +++ b/test/CodeGenOpenCL/blocks.cl | ||
| 60 | @@ -35,11 +35,10 @@ void foo(){ | ||
| 61 | // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3 | ||
| 62 | // SPIR: %[[i_value:.*]] = load i32, i32* %i | ||
| 63 | // SPIR: store i32 %[[i_value]], i32* %[[block_captured]], | ||
| 64 | - // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()* | ||
| 65 | - // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)* | ||
| 66 | - // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]], | ||
| 67 | - // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]] | ||
| 68 | - // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* | ||
| 69 | + // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic* | ||
| 70 | + // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* | ||
| 71 | + // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]], | ||
| 72 | + // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]] | ||
| 73 | // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 | ||
| 74 | // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* | ||
| 75 | // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] | ||
| 76 | @@ -50,11 +49,10 @@ void foo(){ | ||
| 77 | // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 | ||
| 78 | // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i | ||
| 79 | // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]], | ||
| 80 | - // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)* | ||
| 81 | - // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()* | ||
| 82 | - // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]], | ||
| 83 | - // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]] | ||
| 84 | - // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic* | ||
| 85 | + // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)* | ||
| 86 | + // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic* | ||
| 87 | + // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]], | ||
| 88 | + // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]] | ||
| 89 | // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2 | ||
| 90 | // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8* | ||
| 91 | // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]] | ||
| 92 | diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 93 | index 4732194..8445016 100644 | ||
| 94 | --- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 95 | +++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 96 | @@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t; | ||
| 97 | |||
| 98 | // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself. | ||
| 99 | // COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) } | ||
| 100 | -// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*) | ||
| 101 | +// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*) | ||
| 102 | |||
| 103 | // For anonymous blocks without captures, emit block literals as global variable. | ||
| 104 | // COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } | ||
| 105 | @@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 106 | // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue | ||
| 107 | // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags | ||
| 108 | // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke | ||
| 109 | - // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()* | ||
| 110 | - // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()* | ||
| 111 | - // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* | ||
| 112 | + // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic* | ||
| 113 | + // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic* | ||
| 114 | + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)* | ||
| 115 | // COMMON-LABEL: call i32 @__enqueue_kernel_basic( | ||
| 116 | // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, | ||
| 117 | // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), | ||
| 118 | @@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 119 | // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)* | ||
| 120 | // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* | ||
| 121 | // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke | ||
| 122 | - // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* | ||
| 123 | - // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* | ||
| 124 | + // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic* | ||
| 125 | + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)* | ||
| 126 | // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events | ||
| 127 | // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], | ||
| 128 | // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), | ||
| 129 | @@ -300,13 +300,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 130 | // Emits global block literal [[BLG8]] and invoke function [[INVG8]]. | ||
| 131 | // The full type of these expressions are long (and repeated elsewhere), so we | ||
| 132 | // capture it as part of the regex for convenience and clarity. | ||
| 133 | - // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A | ||
| 134 | + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A | ||
| 135 | void (^const block_A)(void) = ^{ | ||
| 136 | return; | ||
| 137 | }; | ||
| 138 | |||
| 139 | // Emits global block literal [[BLG9]] and invoke function [[INVG9]]. | ||
| 140 | - // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B | ||
| 141 | + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B | ||
| 142 | void (^const block_B)(local void *) = ^(local void *a) { | ||
| 143 | return; | ||
| 144 | }; | ||
| 145 | @@ -346,7 +346,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 146 | // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke | ||
| 147 | // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue | ||
| 148 | // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags | ||
| 149 | - // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)* | ||
| 150 | + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)* | ||
| 151 | // COMMON-LABEL: call i32 @__enqueue_kernel_basic( | ||
| 152 | // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, | ||
| 153 | // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), | ||
| 154 | -- | ||
| 155 | 1.8.3.1 | ||
| 156 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch new file mode 100644 index 00000000..4a528674 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch | |||
| @@ -0,0 +1,1119 @@ | |||
| 1 | From 9ce0fe02fd6cda5fb29fbb0d5037a1798a810b8a Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Alexey Sotkin <alexey.sotkin@intel.com> | ||
| 3 | Date: Thu, 21 Feb 2019 17:14:36 +0300 | ||
| 4 | Subject: [PATCH 1/3] Update LowerOpenCL pass to handle new blocks | ||
| 5 | represntation in LLVM IR | ||
| 6 | |||
| 7 | Upstream-Status: Backport | ||
| 8 | [https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/bd6ddfaf7232cd81c7f2fe9877e66f286731bd8e] | ||
| 9 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
| 10 | --- | ||
| 11 | lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 413 ++++++++---------------------- | ||
| 12 | test/global_block.ll | 71 ++--- | ||
| 13 | test/literal-struct.ll | 31 ++- | ||
| 14 | test/transcoding/block_w_struct_return.ll | 47 ++-- | ||
| 15 | test/transcoding/enqueue_kernel.ll | 237 ++++++++++------- | ||
| 16 | 5 files changed, 317 insertions(+), 482 deletions(-) | ||
| 17 | |||
| 18 | diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp | ||
| 19 | index 50e1838..b42a4ec 100644 | ||
| 20 | --- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp | ||
| 21 | +++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp | ||
| 22 | @@ -1,303 +1,110 @@ | ||
| 23 | -//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===// | ||
| 24 | -// | ||
| 25 | -// The LLVM/SPIRV Translator | ||
| 26 | -// | ||
| 27 | -// This file is distributed under the University of Illinois Open Source | ||
| 28 | -// License. See LICENSE.TXT for details. | ||
| 29 | -// | ||
| 30 | -// Copyright (c) 2018 Intel Corporation. All rights reserved. | ||
| 31 | -// | ||
| 32 | -// Permission is hereby granted, free of charge, to any person obtaining a | ||
| 33 | -// copy of this software and associated documentation files (the "Software"), | ||
| 34 | -// to deal with the Software without restriction, including without limitation | ||
| 35 | -// the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 36 | -// and/or sell copies of the Software, and to permit persons to whom the | ||
| 37 | -// Software is furnished to do so, subject to the following conditions: | ||
| 38 | -// | ||
| 39 | -// Redistributions of source code must retain the above copyright notice, | ||
| 40 | -// this list of conditions and the following disclaimers. | ||
| 41 | -// Redistributions in binary form must reproduce the above copyright notice, | ||
| 42 | -// this list of conditions and the following disclaimers in the documentation | ||
| 43 | -// and/or other materials provided with the distribution. | ||
| 44 | -// Neither the names of Intel Corporation, nor the names of its | ||
| 45 | -// contributors may be used to endorse or promote products derived from this | ||
| 46 | -// Software without specific prior written permission. | ||
| 47 | -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 48 | -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 49 | -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| 50 | -// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 51 | -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 52 | -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH | ||
| 53 | -// THE SOFTWARE. | ||
| 54 | -// | ||
| 55 | -//===----------------------------------------------------------------------===// | ||
| 56 | -// | ||
| 57 | -// SPIR-V specification doesn't allow function pointers, so SPIR-V translator | ||
| 58 | -// is designed to fail if a value with function type (except calls) is occured. | ||
| 59 | -// Currently there is only two cases, when function pointers are generating in | ||
| 60 | -// LLVM IR in OpenCL - block calls and device side enqueue built-in calls. | ||
| 61 | -// | ||
| 62 | -// In both cases values with function type used as intermediate representation | ||
| 63 | -// for block literal structure. | ||
| 64 | -// | ||
| 65 | -// This pass is designed to find such cases and simplify them to avoid any | ||
| 66 | -// function pointer types occurrences in LLVM IR in 4 steps. | ||
| 67 | -// | ||
| 68 | -// 1. Find all function pointer allocas, like | ||
| 69 | -// %block = alloca void () * | ||
| 70 | -// | ||
| 71 | -// Then find a single store to that alloca: | ||
| 72 | -// %blockLit = alloca <{ i32, i32, ...}>, align 4 | ||
| 73 | -// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()* | ||
| 74 | -// > store void ()* %0, void ()** %block, align 4 | ||
| 75 | -// | ||
| 76 | -// And replace the alloca users by new instructions which used stored value | ||
| 77 | -// %blockLit itself instead of function pointer alloca %block. | ||
| 78 | -// | ||
| 79 | -// 2. Find consecutive casts from block literal type to i8 addrspace(4)* | ||
| 80 | -// used function pointers as an intermediate type: | ||
| 81 | -// %0 = bitcast <{ i32, i32 }> %block to void() * | ||
| 82 | -// %1 = addrspacecast void() * %0 to i8 addrspace(4)* | ||
| 83 | -// And simplify them: | ||
| 84 | -// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)* | ||
| 85 | -// | ||
| 86 | -// 3. Find all unused instructions with function pointer type occured after | ||
| 87 | -// pp.1-2 and remove them. | ||
| 88 | -// | ||
| 89 | -// 4. Find unused globals with function pointer type, like | ||
| 90 | -// @block = constant void ()* | ||
| 91 | -// bitcast ({ i32, i32 }* @__block_literal_global to void ()* | ||
| 92 | -// | ||
| 93 | -// And remove them. | ||
| 94 | -// | ||
| 95 | -//===----------------------------------------------------------------------===// | ||
| 96 | -#define DEBUG_TYPE "spv-lower-ocl-blocks" | ||
| 97 | - | ||
| 98 | -#include "OCLUtil.h" | ||
| 99 | -#include "SPIRVInternal.h" | ||
| 100 | - | ||
| 101 | -#include "llvm/ADT/SetVector.h" | ||
| 102 | -#include "llvm/Analysis/ValueTracking.h" | ||
| 103 | -#include "llvm/IR/GlobalVariable.h" | ||
| 104 | -#include "llvm/IR/InstIterator.h" | ||
| 105 | -#include "llvm/IR/Module.h" | ||
| 106 | -#include "llvm/Pass.h" | ||
| 107 | -#include "llvm/PassSupport.h" | ||
| 108 | -#include "llvm/Support/Casting.h" | ||
| 109 | - | ||
| 110 | -using namespace llvm; | ||
| 111 | - | ||
| 112 | -namespace { | ||
| 113 | - | ||
| 114 | -static void | ||
| 115 | -removeUnusedFunctionPtrInst(Instruction *I, | ||
| 116 | - SmallSetVector<Instruction *, 16> &FuncPtrInsts) { | ||
| 117 | - for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) { | ||
| 118 | - Instruction *OpI = dyn_cast<Instruction>(I->getOperand(OpIdx)); | ||
| 119 | - I->setOperand(OpIdx, nullptr); | ||
| 120 | - if (OpI && OpI != I && OpI->user_empty()) | ||
| 121 | - FuncPtrInsts.insert(OpI); | ||
| 122 | - } | ||
| 123 | - I->eraseFromParent(); | ||
| 124 | -} | ||
| 125 | - | ||
| 126 | -static bool isFuncPtrAlloca(const AllocaInst *AI) { | ||
| 127 | - auto *ET = dyn_cast<PointerType>(AI->getAllocatedType()); | ||
| 128 | - return ET && ET->getElementType()->isFunctionTy(); | ||
| 129 | -} | ||
| 130 | - | ||
| 131 | -static bool hasFuncPtrType(const Value *V) { | ||
| 132 | - auto *PT = dyn_cast<PointerType>(V->getType()); | ||
| 133 | - return PT && PT->getElementType()->isFunctionTy(); | ||
| 134 | -} | ||
| 135 | - | ||
| 136 | -static bool isFuncPtrInst(const Instruction *I) { | ||
| 137 | - if (auto *AI = dyn_cast<AllocaInst>(I)) | ||
| 138 | - return isFuncPtrAlloca(AI); | ||
| 139 | - | ||
| 140 | - for (auto &Op : I->operands()) { | ||
| 141 | - if (auto *AI = dyn_cast<AllocaInst>(Op)) | ||
| 142 | - return isFuncPtrAlloca(AI); | ||
| 143 | - | ||
| 144 | - auto *OpI = dyn_cast<Instruction>(&Op); | ||
| 145 | - if (OpI && OpI != I && hasFuncPtrType(OpI)) | ||
| 146 | - return true; | ||
| 147 | - } | ||
| 148 | - return false; | ||
| 149 | -} | ||
| 150 | - | ||
| 151 | -static StoreInst *findSingleStore(AllocaInst *AI) { | ||
| 152 | - StoreInst *Store = nullptr; | ||
| 153 | - for (auto *U : AI->users()) { | ||
| 154 | - if (!isa<StoreInst>(U)) | ||
| 155 | - continue; // not a store | ||
| 156 | - if (Store) | ||
| 157 | - return nullptr; // there are more than one stores | ||
| 158 | - Store = dyn_cast<StoreInst>(U); | ||
| 159 | - } | ||
| 160 | - return Store; | ||
| 161 | -} | ||
| 162 | - | ||
| 163 | -static void fixFunctionPtrAllocaUsers(AllocaInst *AI) { | ||
| 164 | - // Find and remove a single store to alloca | ||
| 165 | - auto *SingleStore = findSingleStore(AI); | ||
| 166 | - assert(SingleStore && "More than one store to the function pointer alloca"); | ||
| 167 | - auto *StoredVal = SingleStore->getValueOperand(); | ||
| 168 | - SingleStore->eraseFromParent(); | ||
| 169 | - | ||
| 170 | - // Find loads from the alloca and replace thier users | ||
| 171 | - for (auto *U : AI->users()) { | ||
| 172 | - auto *LI = dyn_cast<LoadInst>(U); | ||
| 173 | - if (!LI) | ||
| 174 | - continue; | ||
| 175 | - | ||
| 176 | - for (auto *U : LI->users()) { | ||
| 177 | - auto *UInst = cast<Instruction>(U); | ||
| 178 | - auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast( | ||
| 179 | - StoredVal, UInst->getType(), "", UInst); | ||
| 180 | - UInst->replaceAllUsesWith(Cast); | ||
| 181 | - } | ||
| 182 | - } | ||
| 183 | -} | ||
| 184 | - | ||
| 185 | -static int getBlockLiteralIdx(const Function &F) { | ||
| 186 | - StringRef FName = F.getName(); | ||
| 187 | - if (isEnqueueKernelBI(FName)) | ||
| 188 | - return FName.contains("events") ? 7 : 4; | ||
| 189 | - if (isKernelQueryBI(FName)) | ||
| 190 | - return FName.contains("for_ndrange") ? 2 : 1; | ||
| 191 | - if (FName.startswith("__") && FName.contains("_block_invoke")) | ||
| 192 | - return F.hasStructRetAttr() ? 1 : 0; | ||
| 193 | - | ||
| 194 | - return -1; // No block literal argument | ||
| 195 | -} | ||
| 196 | - | ||
| 197 | -static bool hasBlockLiteralArg(const Function &F) { | ||
| 198 | - return getBlockLiteralIdx(F) != -1; | ||
| 199 | -} | ||
| 200 | - | ||
| 201 | -static bool simplifyFunctionPtrCasts(Function &F) { | ||
| 202 | - bool Changed = false; | ||
| 203 | - int BlockLiteralIdx = getBlockLiteralIdx(F); | ||
| 204 | - for (auto *U : F.users()) { | ||
| 205 | - auto *Call = dyn_cast<CallInst>(U); | ||
| 206 | - if (!Call) | ||
| 207 | - continue; | ||
| 208 | - if (Call->getFunction()->getName() == F.getName().str() + "_kernel") | ||
| 209 | - continue; // Skip block invoke function calls inside block invoke kernels | ||
| 210 | - | ||
| 211 | - const DataLayout &DL = F.getParent()->getDataLayout(); | ||
| 212 | - auto *BlockLiteral = Call->getOperand(BlockLiteralIdx); | ||
| 213 | - auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL); | ||
| 214 | - if (isa<GlobalVariable>(BlockLiteralVal)) | ||
| 215 | - continue; // nothing to do with globals | ||
| 216 | - | ||
| 217 | - auto *BlockLiteralAlloca = cast<AllocaInst>(BlockLiteralVal); | ||
| 218 | - assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() && | ||
| 219 | - "Function type shouldn't be there"); | ||
| 220 | - | ||
| 221 | - auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast( | ||
| 222 | - BlockLiteralAlloca, BlockLiteral->getType(), "", Call); | ||
| 223 | - BlockLiteral->replaceAllUsesWith(NewBlockLiteral); | ||
| 224 | - Changed |= true; | ||
| 225 | - } | ||
| 226 | - return Changed; | ||
| 227 | -} | ||
| 228 | - | ||
| 229 | -static void | ||
| 230 | -findFunctionPtrAllocas(Module &M, | ||
| 231 | - SmallVectorImpl<AllocaInst *> &FuncPtrAllocas) { | ||
| 232 | - for (auto &F : M) { | ||
| 233 | - if (F.isDeclaration()) | ||
| 234 | - continue; | ||
| 235 | - for (auto &I : instructions(F)) { | ||
| 236 | - auto *AI = dyn_cast<AllocaInst>(&I); | ||
| 237 | - if (!AI || !isFuncPtrAlloca(AI)) | ||
| 238 | - continue; | ||
| 239 | - FuncPtrAllocas.push_back(AI); | ||
| 240 | - } | ||
| 241 | - } | ||
| 242 | -} | ||
| 243 | - | ||
| 244 | -static void | ||
| 245 | -findUnusedFunctionPtrInsts(Module &M, | ||
| 246 | - SmallSetVector<Instruction *, 16> &FuncPtrInsts) { | ||
| 247 | - for (auto &F : M) { | ||
| 248 | - if (F.isDeclaration()) | ||
| 249 | - continue; | ||
| 250 | - for (auto &I : instructions(F)) | ||
| 251 | - if (I.user_empty() && isFuncPtrInst(&I)) | ||
| 252 | - FuncPtrInsts.insert(&I); | ||
| 253 | - } | ||
| 254 | -} | ||
| 255 | - | ||
| 256 | -static void | ||
| 257 | -findUnusedFunctionPtrGlbs(Module &M, | ||
| 258 | - SmallVectorImpl<GlobalVariable *> &FuncPtrGlbs) { | ||
| 259 | - for (auto &GV : M.globals()) { | ||
| 260 | - if (!GV.user_empty()) | ||
| 261 | - continue; | ||
| 262 | - auto *GVType = dyn_cast<PointerType>(GV.getType()->getElementType()); | ||
| 263 | - if (GVType && GVType->getElementType()->isFunctionTy()) | ||
| 264 | - FuncPtrGlbs.push_back(&GV); | ||
| 265 | - } | ||
| 266 | -} | ||
| 267 | - | ||
| 268 | -class SPIRVLowerOCLBlocks : public ModulePass { | ||
| 269 | - | ||
| 270 | -public: | ||
| 271 | - SPIRVLowerOCLBlocks() : ModulePass(ID) {} | ||
| 272 | - | ||
| 273 | - bool runOnModule(Module &M) { | ||
| 274 | - bool Changed = false; | ||
| 275 | - | ||
| 276 | - // 1. Find function pointer allocas and fix their users | ||
| 277 | - SmallVector<AllocaInst *, 16> FuncPtrAllocas; | ||
| 278 | - findFunctionPtrAllocas(M, FuncPtrAllocas); | ||
| 279 | - | ||
| 280 | - Changed |= !FuncPtrAllocas.empty(); | ||
| 281 | - for (auto *AI : FuncPtrAllocas) | ||
| 282 | - fixFunctionPtrAllocaUsers(AI); | ||
| 283 | - | ||
| 284 | - // 2. Simplify consecutive casts which use function pointer types | ||
| 285 | - for (auto &F : M) | ||
| 286 | - if (hasBlockLiteralArg(F)) | ||
| 287 | - Changed |= simplifyFunctionPtrCasts(F); | ||
| 288 | - | ||
| 289 | - // 3. Cleanup unused instructions with function pointer type | ||
| 290 | - // which are occured after pp. 1-2 | ||
| 291 | - SmallSetVector<Instruction *, 16> FuncPtrInsts; | ||
| 292 | - findUnusedFunctionPtrInsts(M, FuncPtrInsts); | ||
| 293 | - | ||
| 294 | - Changed |= !FuncPtrInsts.empty(); | ||
| 295 | - while (!FuncPtrInsts.empty()) { | ||
| 296 | - Instruction *I = FuncPtrInsts.pop_back_val(); | ||
| 297 | - removeUnusedFunctionPtrInst(I, FuncPtrInsts); | ||
| 298 | - } | ||
| 299 | - | ||
| 300 | - // 4. Find and remove unused global variables with function pointer type | ||
| 301 | - SmallVector<GlobalVariable *, 16> FuncPtrGlbs; | ||
| 302 | - findUnusedFunctionPtrGlbs(M, FuncPtrGlbs); | ||
| 303 | - | ||
| 304 | - Changed |= !FuncPtrGlbs.empty(); | ||
| 305 | - for (auto *GV : FuncPtrGlbs) | ||
| 306 | - GV->eraseFromParent(); | ||
| 307 | - | ||
| 308 | - return Changed; | ||
| 309 | - } | ||
| 310 | - | ||
| 311 | - static char ID; | ||
| 312 | -}; // class SPIRVLowerOCLBlocks | ||
| 313 | - | ||
| 314 | -char SPIRVLowerOCLBlocks::ID = 0; | ||
| 315 | - | ||
| 316 | -} // namespace | ||
| 317 | - | ||
| 318 | -INITIALIZE_PASS( | ||
| 319 | - SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks", | ||
| 320 | - "Remove function pointers occured in case of using OpenCL blocks", false, | ||
| 321 | - false) | ||
| 322 | - | ||
| 323 | -llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() { | ||
| 324 | - return new SPIRVLowerOCLBlocks(); | ||
| 325 | -} | ||
| 326 | +//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===// | ||
| 327 | +// | ||
| 328 | +// The LLVM/SPIRV Translator | ||
| 329 | +// | ||
| 330 | +// This file is distributed under the University of Illinois Open Source | ||
| 331 | +// License. See LICENSE.TXT for details. | ||
| 332 | +// | ||
| 333 | +// Copyright (c) 2018 Intel Corporation. All rights reserved. | ||
| 334 | +// | ||
| 335 | +// Permission is hereby granted, free of charge, to any person obtaining a | ||
| 336 | +// copy of this software and associated documentation files (the "Software"), | ||
| 337 | +// to deal with the Software without restriction, including without limitation | ||
| 338 | +// the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 339 | +// and/or sell copies of the Software, and to permit persons to whom the | ||
| 340 | +// Software is furnished to do so, subject to the following conditions: | ||
| 341 | +// | ||
| 342 | +// Redistributions of source code must retain the above copyright notice, | ||
| 343 | +// this list of conditions and the following disclaimers. | ||
| 344 | +// Redistributions in binary form must reproduce the above copyright notice, | ||
| 345 | +// this list of conditions and the following disclaimers in the documentation | ||
| 346 | +// and/or other materials provided with the distribution. | ||
| 347 | +// Neither the names of Intel Corporation, nor the names of its | ||
| 348 | +// contributors may be used to endorse or promote products derived from this | ||
| 349 | +// Software without specific prior written permission. | ||
| 350 | +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 351 | +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 352 | +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| 353 | +// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 354 | +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 355 | +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH | ||
| 356 | +// THE SOFTWARE. | ||
| 357 | +// | ||
| 358 | +//===----------------------------------------------------------------------===// | ||
| 359 | +// | ||
| 360 | +// SPIR-V specification doesn't allow function pointers, so SPIR-V translator | ||
| 361 | +// is designed to fail if a value with function type (except calls) is occured. | ||
| 362 | +// Currently there is only two cases, when function pointers are generating in | ||
| 363 | +// LLVM IR in OpenCL - block calls and device side enqueue built-in calls. | ||
| 364 | +// | ||
| 365 | +// In both cases values with function type used as intermediate representation | ||
| 366 | +// for block literal structure. | ||
| 367 | +// | ||
| 368 | +// In LLVM IR produced by clang, blocks are represented with the following | ||
| 369 | +// structure: | ||
| 370 | +// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
| 371 | +// Pointers to block invoke functions are stored in the third field. Clang | ||
| 372 | +// replaces inderect function calls in all cases except if block is passed as a | ||
| 373 | +// function argument. Note that it is somewhat unclear if the OpenCL C spec | ||
| 374 | +// should allow passing blocks as function argumernts. This pass is not supposed | ||
| 375 | +// to work correctly with such functions. | ||
| 376 | +// Clang though has to store function pointers to this structure. Purpose of | ||
| 377 | +// this pass is to replace store of function pointers(not allowed in SPIR-V) | ||
| 378 | +// with null pointers. | ||
| 379 | +// | ||
| 380 | +//===----------------------------------------------------------------------===// | ||
| 381 | +#define DEBUG_TYPE "spv-lower-ocl-blocks" | ||
| 382 | + | ||
| 383 | +#include "SPIRVInternal.h" | ||
| 384 | + | ||
| 385 | +#include "llvm/IR/Module.h" | ||
| 386 | +#include "llvm/Pass.h" | ||
| 387 | +#include "llvm/Support/Regex.h" | ||
| 388 | + | ||
| 389 | +using namespace llvm; | ||
| 390 | + | ||
| 391 | +namespace { | ||
| 392 | + | ||
| 393 | +static bool isBlockInvoke(Function &F) { | ||
| 394 | + static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$"); | ||
| 395 | + return BlockInvokeRegex.match(F.getName()); | ||
| 396 | +} | ||
| 397 | + | ||
| 398 | +class SPIRVLowerOCLBlocks : public ModulePass { | ||
| 399 | + | ||
| 400 | +public: | ||
| 401 | + SPIRVLowerOCLBlocks() : ModulePass(ID) {} | ||
| 402 | + | ||
| 403 | + bool runOnModule(Module &M) { | ||
| 404 | + bool Changed = false; | ||
| 405 | + for (Function &F : M) { | ||
| 406 | + if (!isBlockInvoke(F)) | ||
| 407 | + continue; | ||
| 408 | + for (User *U : F.users()) { | ||
| 409 | + if (!isa<Constant>(U)) | ||
| 410 | + continue; | ||
| 411 | + Constant *Null = Constant::getNullValue(U->getType()); | ||
| 412 | + if (U != Null) { | ||
| 413 | + U->replaceAllUsesWith(Null); | ||
| 414 | + Changed = true; | ||
| 415 | + } | ||
| 416 | + } | ||
| 417 | + } | ||
| 418 | + return Changed; | ||
| 419 | + } | ||
| 420 | + | ||
| 421 | + static char ID; | ||
| 422 | +}; | ||
| 423 | + | ||
| 424 | +char SPIRVLowerOCLBlocks::ID = 0; | ||
| 425 | + | ||
| 426 | +} // namespace | ||
| 427 | + | ||
| 428 | +INITIALIZE_PASS( | ||
| 429 | + SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks", | ||
| 430 | + "Remove function pointers occured in case of using OpenCL blocks", false, | ||
| 431 | + false) | ||
| 432 | + | ||
| 433 | +llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() { | ||
| 434 | + return new SPIRVLowerOCLBlocks(); | ||
| 435 | +} | ||
| 436 | diff --git a/test/global_block.ll b/test/global_block.ll | ||
| 437 | index a9267d8..efb4cf3 100644 | ||
| 438 | --- a/test/global_block.ll | ||
| 439 | +++ b/test/global_block.ll | ||
| 440 | @@ -16,7 +16,7 @@ | ||
| 441 | ; RUN: llvm-spirv %t.bc -o %t.spv | ||
| 442 | ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM | ||
| 443 | |||
| 444 | -target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" | ||
| 445 | +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
| 446 | target triple = "spir-unknown-unknown" | ||
| 447 | |||
| 448 | ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke" | ||
| 449 | @@ -26,71 +26,56 @@ target triple = "spir-unknown-unknown" | ||
| 450 | ; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]] | ||
| 451 | ; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]] | ||
| 452 | |||
| 453 | -;; This variable is not needed in SPIRV | ||
| 454 | -; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1 | ||
| 455 | -; CHECK-LLVM-NOT: @block_kernel.b1 | ||
| 456 | -@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8 | ||
| 457 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
| 458 | |||
| 459 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
| 460 | +@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4 | ||
| 461 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 | ||
| 462 | |||
| 463 | -; Function Attrs: convergent nounwind | ||
| 464 | -define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { | ||
| 465 | +; Function Attrs: convergent noinline nounwind optnone | ||
| 466 | +define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { | ||
| 467 | entry: | ||
| 468 | - %res.addr = alloca i32 addrspace(1)*, align 8 | ||
| 469 | - store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 | ||
| 470 | - | ||
| 471 | + %res.addr = alloca i32 addrspace(1)*, align 4 | ||
| 472 | + store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4 | ||
| 473 | ; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]] | ||
| 474 | ; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5) | ||
| 475 | - %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 | ||
| 476 | - | ||
| 477 | - %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 | ||
| 478 | - store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14 | ||
| 479 | + %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 | ||
| 480 | + %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4 | ||
| 481 | + store i32 %call, i32 addrspace(1)* %0, align 4 | ||
| 482 | ret void | ||
| 483 | } | ||
| 484 | |||
| 485 | -; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]] | ||
| 486 | +; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]] | ||
| 487 | ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}} | ||
| 488 | ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}} | ||
| 489 | ; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}}) | ||
| 490 | -; Function Attrs: convergent nounwind | ||
| 491 | +; Function Attrs: convergent noinline nounwind optnone | ||
| 492 | define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 { | ||
| 493 | entry: | ||
| 494 | - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 | ||
| 495 | + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
| 496 | %i.addr = alloca i32, align 4 | ||
| 497 | - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 | ||
| 498 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
| 499 | - store i32 %i, i32* %i.addr, align 4, !tbaa !14 | ||
| 500 | - %0 = load i32, i32* %i.addr, align 4, !tbaa !14 | ||
| 501 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
| 502 | + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 503 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
| 504 | + store i32 %i, i32* %i.addr, align 4 | ||
| 505 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
| 506 | + %0 = load i32, i32* %i.addr, align 4 | ||
| 507 | %add = add nsw i32 %0, 1 | ||
| 508 | ret i32 %add | ||
| 509 | } | ||
| 510 | |||
| 511 | -attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 512 | -attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 513 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 514 | +attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 515 | attributes #2 = { convergent } | ||
| 516 | |||
| 517 | !llvm.module.flags = !{!0} | ||
| 518 | -!opencl.enable.FP_CONTRACT = !{} | ||
| 519 | !opencl.ocl.version = !{!1} | ||
| 520 | !opencl.spir.version = !{!1} | ||
| 521 | -!opencl.used.extensions = !{!2} | ||
| 522 | -!opencl.used.optional.core.features = !{!2} | ||
| 523 | -!opencl.compiler.options = !{!2} | ||
| 524 | -!llvm.ident = !{!3} | ||
| 525 | +!llvm.ident = !{!2} | ||
| 526 | |||
| 527 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
| 528 | !1 = !{i32 2, i32 0} | ||
| 529 | -!2 = !{} | ||
| 530 | -!3 = !{!"clang version 7.0.0"} | ||
| 531 | -!4 = !{i32 1} | ||
| 532 | -!5 = !{!"none"} | ||
| 533 | -!6 = !{!"int*"} | ||
| 534 | -!7 = !{!""} | ||
| 535 | -!8 = !{i1 false} | ||
| 536 | -!9 = !{i32 0} | ||
| 537 | -!10 = !{!11, !11, i64 0} | ||
| 538 | -!11 = !{!"any pointer", !12, i64 0} | ||
| 539 | -!12 = !{!"omnipotent char", !13, i64 0} | ||
| 540 | -!13 = !{!"Simple C/C++ TBAA"} | ||
| 541 | -!14 = !{!15, !15, i64 0} | ||
| 542 | -!15 = !{!"int", !12, i64 0} | ||
| 543 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
| 544 | +!3 = !{i32 1} | ||
| 545 | +!4 = !{!"none"} | ||
| 546 | +!5 = !{!"int*"} | ||
| 547 | +!6 = !{!""} | ||
| 548 | diff --git a/test/literal-struct.ll b/test/literal-struct.ll | ||
| 549 | index c52170a..52a731a 100644 | ||
| 550 | --- a/test/literal-struct.ll | ||
| 551 | +++ b/test/literal-struct.ll | ||
| 552 | @@ -2,7 +2,7 @@ | ||
| 553 | ; structs, i.e. structs whose type has no name. Typicaly clang generate such | ||
| 554 | ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with | ||
| 555 | ; the following command: | ||
| 556 | -; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll | ||
| 557 | +; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll | ||
| 558 | |||
| 559 | ; literal-struct.cl: | ||
| 560 | ; void foo() | ||
| 561 | @@ -14,25 +14,28 @@ | ||
| 562 | ; RUN: llvm-as < %s | llvm-spirv -spirv-text -o %t | ||
| 563 | ; RUN: FileCheck < %t %s | ||
| 564 | |||
| 565 | -; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0 | ||
| 566 | -; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}} | ||
| 567 | +; CHECK: TypeInt [[Int:[0-9]+]] 32 0 | ||
| 568 | +; CHECK: TypeInt [[Int8:[0-9]+]] 8 0 | ||
| 569 | +; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]] | ||
| 570 | +; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]] | ||
| 571 | |||
| 572 | target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
| 573 | target triple = "spir" | ||
| 574 | |||
| 575 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
| 576 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
| 577 | + | ||
| 578 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 | ||
| 579 | ; CHECK: ConstantComposite [[StructType]] | ||
| 580 | |||
| 581 | -; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct. | ||
| 582 | -@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4 | ||
| 583 | +@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4 | ||
| 584 | ; CHECK: ConstantNull [[StructType]] | ||
| 585 | |||
| 586 | ; Function Attrs: convergent noinline nounwind optnone | ||
| 587 | define spir_func void @foo() #0 { | ||
| 588 | entry: | ||
| 589 | - %myBlock = alloca void () addrspace(4)*, align 4 | ||
| 590 | - store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4 | ||
| 591 | - call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 | ||
| 592 | + %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4 | ||
| 593 | + store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4 | ||
| 594 | + call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 | ||
| 595 | ret void | ||
| 596 | } | ||
| 597 | |||
| 598 | @@ -40,14 +43,14 @@ entry: | ||
| 599 | define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 { | ||
| 600 | entry: | ||
| 601 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
| 602 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 | ||
| 603 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
| 604 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 605 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
| 606 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 | ||
| 607 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
| 608 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
| 609 | ret void | ||
| 610 | } | ||
| 611 | |||
| 612 | -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 613 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 614 | attributes #1 = { convergent } | ||
| 615 | |||
| 616 | !llvm.module.flags = !{!0} | ||
| 617 | @@ -57,4 +60,4 @@ attributes #1 = { convergent } | ||
| 618 | |||
| 619 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
| 620 | !1 = !{i32 2, i32 0} | ||
| 621 | -!2 = !{!"clang version 8.0.0 "} | ||
| 622 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
| 623 | diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll | ||
| 624 | index 76e29f0..df89b13 100644 | ||
| 625 | --- a/test/transcoding/block_w_struct_return.ll | ||
| 626 | +++ b/test/transcoding/block_w_struct_return.ll | ||
| 627 | @@ -16,6 +16,8 @@ | ||
| 628 | ; res[tid] = kernelBlock(aa).a - 6; | ||
| 629 | ; } | ||
| 630 | |||
| 631 | +; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll | ||
| 632 | + | ||
| 633 | ; RUN: llvm-as %s -o %t.bc | ||
| 634 | ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt | ||
| 635 | ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV | ||
| 636 | @@ -27,12 +29,14 @@ | ||
| 637 | ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke" | ||
| 638 | |||
| 639 | ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32 | ||
| 640 | +; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8 | ||
| 641 | +; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]] | ||
| 642 | ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]] | ||
| 643 | ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]] | ||
| 644 | |||
| 645 | ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7 | ||
| 646 | ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7 | ||
| 647 | -; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}} | ||
| 648 | +; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}} | ||
| 649 | ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]] | ||
| 650 | |||
| 651 | ; CHECK-LLVM: %[[StructA:.*]] = type { i32 } | ||
| 652 | @@ -41,20 +45,21 @@ | ||
| 653 | target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
| 654 | target triple = "spir64-unknown-unknown" | ||
| 655 | |||
| 656 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
| 657 | %struct.A = type { i32 } | ||
| 658 | |||
| 659 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
| 660 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8 | ||
| 661 | |||
| 662 | ; Function Attrs: convergent noinline nounwind optnone | ||
| 663 | -define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 { | ||
| 664 | +define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { | ||
| 665 | entry: | ||
| 666 | %res.addr = alloca i32 addrspace(1)*, align 8 | ||
| 667 | - %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8 | ||
| 668 | + %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8 | ||
| 669 | %tid = alloca i64, align 8 | ||
| 670 | %aa = alloca %struct.A, align 4 | ||
| 671 | %tmp = alloca %struct.A, align 4 | ||
| 672 | store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8 | ||
| 673 | - store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8 | ||
| 674 | + store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8 | ||
| 675 | %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4 | ||
| 676 | store i64 %call, i64* %tid, align 8 | ||
| 677 | %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8 | ||
| 678 | @@ -63,7 +68,7 @@ entry: | ||
| 679 | store i32 -1, i32 addrspace(1)* %arrayidx, align 4 | ||
| 680 | %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0 | ||
| 681 | store i32 5, i32* %a, align 4 | ||
| 682 | - call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 | ||
| 683 | + call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 | ||
| 684 | %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0 | ||
| 685 | %2 = load i32, i32* %a1, align 4 | ||
| 686 | %sub = sub nsw i32 %2, 6 | ||
| 687 | @@ -78,10 +83,10 @@ entry: | ||
| 688 | define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 { | ||
| 689 | entry: | ||
| 690 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 | ||
| 691 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8 | ||
| 692 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8 | ||
| 693 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 | ||
| 694 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
| 695 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8 | ||
| 696 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
| 697 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8 | ||
| 698 | %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0 | ||
| 699 | store i32 6, i32* %a1, align 4 | ||
| 700 | %0 = bitcast %struct.A* %agg.result to i8* | ||
| 701 | @@ -96,30 +101,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r | ||
| 702 | ; Function Attrs: convergent nounwind readnone | ||
| 703 | declare spir_func i64 @_Z13get_global_idj(i32) #3 | ||
| 704 | |||
| 705 | -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 706 | -attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 707 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 708 | +attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 709 | attributes #2 = { argmemonly nounwind } | ||
| 710 | attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 711 | attributes #4 = { convergent nounwind readnone } | ||
| 712 | attributes #5 = { convergent } | ||
| 713 | |||
| 714 | !llvm.module.flags = !{!0} | ||
| 715 | -!opencl.enable.FP_CONTRACT = !{} | ||
| 716 | !opencl.ocl.version = !{!1} | ||
| 717 | !opencl.spir.version = !{!1} | ||
| 718 | -!opencl.used.extensions = !{!2} | ||
| 719 | -!opencl.used.optional.core.features = !{!2} | ||
| 720 | -!opencl.compiler.options = !{!2} | ||
| 721 | -!llvm.ident = !{!3} | ||
| 722 | +!llvm.ident = !{!2} | ||
| 723 | |||
| 724 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
| 725 | !1 = !{i32 2, i32 0} | ||
| 726 | -!2 = !{} | ||
| 727 | -!3 = !{!"clang version 7.0.0"} | ||
| 728 | -!4 = !{i32 1} | ||
| 729 | -!5 = !{!"none"} | ||
| 730 | -!6 = !{!"int*"} | ||
| 731 | -!7 = !{!""} | ||
| 732 | -!8 = !{i1 false} | ||
| 733 | -!9 = !{i32 0} | ||
| 734 | - | ||
| 735 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
| 736 | +!3 = !{i32 1} | ||
| 737 | +!4 = !{!"none"} | ||
| 738 | +!5 = !{!"int*"} | ||
| 739 | +!6 = !{!""} | ||
| 740 | diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll | ||
| 741 | index 0d29c71..435871d 100644 | ||
| 742 | --- a/test/transcoding/enqueue_kernel.ll | ||
| 743 | +++ b/test/transcoding/enqueue_kernel.ll | ||
| 744 | @@ -51,11 +51,12 @@ | ||
| 745 | ; ModuleID = 'enqueue_kernel.cl' | ||
| 746 | source_filename = "enqueue_kernel.cl" | ||
| 747 | target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
| 748 | -target triple = "spir-unknown-unknown" | ||
| 749 | +target triple = "spir" | ||
| 750 | |||
| 751 | %opencl.queue_t = type opaque | ||
| 752 | %struct.ndrange_t = type { i32 } | ||
| 753 | %opencl.clk_event_t = type opaque | ||
| 754 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
| 755 | |||
| 756 | ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel" | ||
| 757 | ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel" | ||
| 758 | @@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown" | ||
| 759 | |||
| 760 | ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32 | ||
| 761 | ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8 | ||
| 762 | -; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 | ||
| 763 | ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0 | ||
| 764 | -; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17 | ||
| 765 | +; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21 | ||
| 766 | ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2 | ||
| 767 | -; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20 | ||
| 768 | -; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] | ||
| 769 | +; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 | ||
| 770 | +; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24 | ||
| 771 | |||
| 772 | ; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}} | ||
| 773 | +; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] | ||
| 774 | +; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] | ||
| 775 | ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]] | ||
| 776 | ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]] | ||
| 777 | -; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] | ||
| 778 | ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]] | ||
| 779 | ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] | ||
| 780 | ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] | ||
| 781 | ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] | ||
| 782 | ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]] | ||
| 783 | |||
| 784 | -; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32 } | ||
| 785 | -; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }> | ||
| 786 | -; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> | ||
| 787 | -; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32 }> | ||
| 788 | +; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32, i8 addrspace(4)* } | ||
| 789 | +; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> | ||
| 790 | +; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> | ||
| 791 | +; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)* }> | ||
| 792 | |||
| 793 | -; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 | ||
| 794 | -; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 | ||
| 795 | +; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 | ||
| 796 | +; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 | ||
| 797 | |||
| 798 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
| 799 | -@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
| 800 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4 | ||
| 801 | +@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4 | ||
| 802 | |||
| 803 | ; Function Attrs: convergent noinline nounwind optnone | ||
| 804 | -define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { | ||
| 805 | +define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { | ||
| 806 | entry: | ||
| 807 | + %a.addr = alloca i32 addrspace(1)*, align 4 | ||
| 808 | + %b.addr = alloca i32 addrspace(1)*, align 4 | ||
| 809 | + %i.addr = alloca i32, align 4 | ||
| 810 | + %c0.addr = alloca i8, align 1 | ||
| 811 | %default_queue = alloca %opencl.queue_t*, align 4 | ||
| 812 | %flags = alloca i32, align 4 | ||
| 813 | %ndrange = alloca %struct.ndrange_t, align 4 | ||
| 814 | %clk_event = alloca %opencl.clk_event_t*, align 4 | ||
| 815 | %event_wait_list = alloca %opencl.clk_event_t*, align 4 | ||
| 816 | %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4 | ||
| 817 | - %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4 | ||
| 818 | - %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 | ||
| 819 | + %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4 | ||
| 820 | + %tmp = alloca %struct.ndrange_t, align 4 | ||
| 821 | + %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 | ||
| 822 | + %tmp4 = alloca %struct.ndrange_t, align 4 | ||
| 823 | %c = alloca i8, align 1 | ||
| 824 | + %tmp11 = alloca %struct.ndrange_t, align 4 | ||
| 825 | + %block_sizes = alloca [1 x i32], align 4 | ||
| 826 | + %tmp12 = alloca %struct.ndrange_t, align 4 | ||
| 827 | + %block_sizes13 = alloca [3 x i32], align 4 | ||
| 828 | + store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4 | ||
| 829 | + store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4 | ||
| 830 | + store i32 %i, i32* %i.addr, align 4 | ||
| 831 | + store i8 %c0, i8* %c0.addr, align 1 | ||
| 832 | store i32 0, i32* %flags, align 4 | ||
| 833 | %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 | ||
| 834 | %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4 | ||
| 835 | store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4 | ||
| 836 | %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
| 837 | %2 = load i32, i32* %flags, align 4 | ||
| 838 | - %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 | ||
| 839 | - store i32 17, i32* %block.size, align 4 | ||
| 840 | - %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 | ||
| 841 | + %3 = bitcast %struct.ndrange_t* %tmp to i8* | ||
| 842 | + %4 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
| 843 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false) | ||
| 844 | + %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 | ||
| 845 | + store i32 21, i32* %block.size, align 4 | ||
| 846 | + %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 | ||
| 847 | store i32 4, i32* %block.align, align 4 | ||
| 848 | - %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 | ||
| 849 | - store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4 | ||
| 850 | - %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 | ||
| 851 | - store i32 %i, i32* %block.captured1, align 4 | ||
| 852 | - %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 | ||
| 853 | - store i8 %c0, i8* %block.captured2, align 4 | ||
| 854 | - %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()* | ||
| 855 | - %4 = addrspacecast void ()* %3 to i8 addrspace(4)* | ||
| 856 | + %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 | ||
| 857 | + store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4 | ||
| 858 | + %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 | ||
| 859 | + %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 | ||
| 860 | + store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4 | ||
| 861 | + %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 | ||
| 862 | + %6 = load i32, i32* %i.addr, align 4 | ||
| 863 | + store i32 %6, i32* %block.captured1, align 4 | ||
| 864 | + %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5 | ||
| 865 | + %7 = load i8, i8* %c0.addr, align 1 | ||
| 866 | + store i8 %7, i8* %block.captured2, align 4 | ||
| 867 | + %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic* | ||
| 868 | + %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)* | ||
| 869 | |||
| 870 | ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]] | ||
| 871 | ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} | ||
| 872 | ; [[ConstInt0]] [[EventNull]] [[EventNull]] | ||
| 873 | ; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]] | ||
| 874 | |||
| 875 | -; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)* | ||
| 876 | +; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic* | ||
| 877 | +; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)* | ||
| 878 | ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)* | ||
| 879 | -; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]]) | ||
| 880 | - | ||
| 881 | - %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4) | ||
| 882 | - %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* | ||
| 883 | - %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* | ||
| 884 | - %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 | ||
| 885 | - store i32 20, i32* %block.size5, align 4 | ||
| 886 | - %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 | ||
| 887 | +; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]]) | ||
| 888 | + | ||
| 889 | + %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) | ||
| 890 | + %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
| 891 | + %12 = load i32, i32* %flags, align 4 | ||
| 892 | + %13 = bitcast %struct.ndrange_t* %tmp4 to i8* | ||
| 893 | + %14 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
| 894 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false) | ||
| 895 | + %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* | ||
| 896 | + %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* | ||
| 897 | + %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 | ||
| 898 | + store i32 24, i32* %block.size5, align 4 | ||
| 899 | + %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 | ||
| 900 | store i32 4, i32* %block.align6, align 4 | ||
| 901 | - %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 | ||
| 902 | - store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4 | ||
| 903 | - %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 | ||
| 904 | - store i32 %i, i32* %block.captured8, align 4 | ||
| 905 | - %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 | ||
| 906 | - store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4 | ||
| 907 | - %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()* | ||
| 908 | - %9 = addrspacecast void ()* %8 to i8 addrspace(4)* | ||
| 909 | + %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 | ||
| 910 | + store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4 | ||
| 911 | + %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 | ||
| 912 | + %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 | ||
| 913 | + store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4 | ||
| 914 | + %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 | ||
| 915 | + %18 = load i32, i32* %i.addr, align 4 | ||
| 916 | + store i32 %18, i32* %block.captured9, align 4 | ||
| 917 | + %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5 | ||
| 918 | + %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4 | ||
| 919 | + store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4 | ||
| 920 | + %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic* | ||
| 921 | + %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)* | ||
| 922 | + | ||
| 923 | |||
| 924 | ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]] | ||
| 925 | ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]] | ||
| 926 | @@ -158,16 +193,24 @@ entry: | ||
| 927 | ; [[ConstInt2]] [[Event1]] [[Event2]] | ||
| 928 | ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]] | ||
| 929 | |||
| 930 | -; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)* | ||
| 931 | +; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic* | ||
| 932 | +; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4) | ||
| 933 | ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)* | ||
| 934 | -; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]]) | ||
| 935 | - | ||
| 936 | - %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) | ||
| 937 | - %11 = alloca [1 x i32] | ||
| 938 | - %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0 | ||
| 939 | - %13 = load i8, i8* %c, align 1 | ||
| 940 | - %14 = zext i8 %13 to i32 | ||
| 941 | - store i32 %14, i32* %12, align 4 | ||
| 942 | +; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]]) | ||
| 943 | + | ||
| 944 | + %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21) | ||
| 945 | + %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
| 946 | + %24 = load i32, i32* %flags, align 4 | ||
| 947 | + %25 = bitcast %struct.ndrange_t* %tmp11 to i8* | ||
| 948 | + %26 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
| 949 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false) | ||
| 950 | + %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 | ||
| 951 | + %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)* | ||
| 952 | + %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* | ||
| 953 | + %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0 | ||
| 954 | + %30 = load i8, i8* %c, align 1 | ||
| 955 | + %31 = zext i8 %30 to i32 | ||
| 956 | + store i32 %31, i32* %29, align 4 | ||
| 957 | |||
| 958 | ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]] | ||
| 959 | ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]] | ||
| 960 | @@ -182,14 +225,18 @@ entry: | ||
| 961 | ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)* | ||
| 962 | ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}}) | ||
| 963 | |||
| 964 | - %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12) | ||
| 965 | - %16 = alloca [3 x i32] | ||
| 966 | - %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0 | ||
| 967 | - store i32 1, i32* %17, align 4 | ||
| 968 | - %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1 | ||
| 969 | - store i32 2, i32* %18, align 4 | ||
| 970 | - %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2 | ||
| 971 | - store i32 4, i32* %19, align 4 | ||
| 972 | + %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29) | ||
| 973 | + %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
| 974 | + %34 = load i32, i32* %flags, align 4 | ||
| 975 | + %35 = bitcast %struct.ndrange_t* %tmp12 to i8* | ||
| 976 | + %36 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
| 977 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false) | ||
| 978 | + %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0 | ||
| 979 | + store i32 1, i32* %37, align 4 | ||
| 980 | + %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1 | ||
| 981 | + store i32 2, i32* %38, align 4 | ||
| 982 | + %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2 | ||
| 983 | + store i32 4, i32* %39, align 4 | ||
| 984 | |||
| 985 | ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]] | ||
| 986 | ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]] | ||
| 987 | @@ -206,24 +253,27 @@ entry: | ||
| 988 | ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)* | ||
| 989 | ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}}) | ||
| 990 | |||
| 991 | - %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17) | ||
| 992 | + %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37) | ||
| 993 | ret void | ||
| 994 | } | ||
| 995 | |||
| 996 | +; Function Attrs: argmemonly nounwind | ||
| 997 | +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1 | ||
| 998 | + | ||
| 999 | ; Function Attrs: convergent noinline nounwind optnone | ||
| 1000 | define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 { | ||
| 1001 | entry: | ||
| 1002 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
| 1003 | - %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 | ||
| 1004 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 | ||
| 1005 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 1006 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* | ||
| 1007 | - store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 | ||
| 1008 | - %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 | ||
| 1009 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* | ||
| 1010 | + store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 | ||
| 1011 | + %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5 | ||
| 1012 | %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4 | ||
| 1013 | %conv = sext i8 %0 to i32 | ||
| 1014 | - %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2 | ||
| 1015 | + %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 | ||
| 1016 | %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4 | ||
| 1017 | - %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 | ||
| 1018 | + %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 | ||
| 1019 | %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4 | ||
| 1020 | %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2 | ||
| 1021 | store i32 %conv, i32 addrspace(1)* %arrayidx, align 4 | ||
| 1022 | @@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i | ||
| 1023 | define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 { | ||
| 1024 | entry: | ||
| 1025 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
| 1026 | - %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 | ||
| 1027 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 | ||
| 1028 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 1029 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* | ||
| 1030 | - store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 | ||
| 1031 | - %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 | ||
| 1032 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* | ||
| 1033 | + store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 | ||
| 1034 | + %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5 | ||
| 1035 | %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4 | ||
| 1036 | - %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 | ||
| 1037 | + %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 | ||
| 1038 | %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4 | ||
| 1039 | %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1 | ||
| 1040 | %2 = load i32, i32 addrspace(1)* %arrayidx, align 4 | ||
| 1041 | - %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2 | ||
| 1042 | + %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 | ||
| 1043 | %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4 | ||
| 1044 | - %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 | ||
| 1045 | + %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 | ||
| 1046 | %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4 | ||
| 1047 | %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4 | ||
| 1048 | store i32 %2, i32 addrspace(1)* %arrayidx4, align 4 | ||
| 1049 | @@ -276,11 +326,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac | ||
| 1050 | entry: | ||
| 1051 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
| 1052 | %p.addr = alloca i8 addrspace(3)*, align 4 | ||
| 1053 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 | ||
| 1054 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
| 1055 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 1056 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
| 1057 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
| 1058 | store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4 | ||
| 1059 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 | ||
| 1060 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
| 1061 | ret void | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | @@ -300,13 +350,13 @@ entry: | ||
| 1065 | %p1.addr = alloca i8 addrspace(3)*, align 4 | ||
| 1066 | %p2.addr = alloca i8 addrspace(3)*, align 4 | ||
| 1067 | %p3.addr = alloca i8 addrspace(3)*, align 4 | ||
| 1068 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 | ||
| 1069 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
| 1070 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 1071 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
| 1072 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
| 1073 | store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4 | ||
| 1074 | store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4 | ||
| 1075 | store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4 | ||
| 1076 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 | ||
| 1077 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
| 1078 | ret void | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | @@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*, | ||
| 1082 | ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*) | ||
| 1083 | ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*) | ||
| 1084 | |||
| 1085 | -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 1086 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 1087 | attributes #1 = { argmemonly nounwind } | ||
| 1088 | -attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 1089 | +attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 1090 | attributes #3 = { nounwind } | ||
| 1091 | |||
| 1092 | !llvm.module.flags = !{!0} | ||
| 1093 | -!opencl.enable.FP_CONTRACT = !{} | ||
| 1094 | !opencl.ocl.version = !{!1} | ||
| 1095 | !opencl.spir.version = !{!1} | ||
| 1096 | -!opencl.used.extensions = !{!2} | ||
| 1097 | -!opencl.used.optional.core.features = !{!2} | ||
| 1098 | -!opencl.compiler.options = !{!2} | ||
| 1099 | -!llvm.ident = !{!3} | ||
| 1100 | +!llvm.ident = !{!2} | ||
| 1101 | |||
| 1102 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
| 1103 | !1 = !{i32 2, i32 0} | ||
| 1104 | -!2 = !{} | ||
| 1105 | -!3 = !{!"clang version 7.0.0"} | ||
| 1106 | -!4 = !{i32 1, i32 1, i32 0, i32 0} | ||
| 1107 | -!5 = !{!"none", !"none", !"none", !"none"} | ||
| 1108 | -!6 = !{!"int*", !"int*", !"int", !"char"} | ||
| 1109 | -!7 = !{!"", !"", !"", !""} | ||
| 1110 | -!8 = !{i1 false, i1 false, i1 false, i1 false} | ||
| 1111 | -!9 = !{i32 0, i32 0, i32 0, i32 0} | ||
| 1112 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
| 1113 | +!3 = !{i32 1, i32 1, i32 0, i32 0} | ||
| 1114 | +!4 = !{!"none", !"none", !"none", !"none"} | ||
| 1115 | +!5 = !{!"int*", !"int*", !"int", !"char"} | ||
| 1116 | +!6 = !{!"", !"", !"", !""} | ||
| 1117 | -- | ||
| 1118 | 1.8.3.1 | ||
| 1119 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch new file mode 100644 index 00000000..9d25bbad --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | From 7bbd0058362ac3bb5edd7a82d43e1785810776b3 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Anuj Mittal <anuj.mittal@intel.com> | ||
| 3 | Date: Fri, 29 Mar 2019 08:56:53 +0800 | ||
| 4 | Subject: [PATCH] dont export targets for binaries | ||
| 5 | |||
| 6 | The projects using LLVM cmake modules look for target binaries in | ||
| 7 | sysroot as a result which isn't desirable in this case and isn't needed | ||
| 8 | either. | ||
| 9 | |||
| 10 | Upstream-Status: Inappropriate [cross-compile specific] | ||
| 11 | |||
| 12 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
| 13 | --- | ||
| 14 | llvm/cmake/modules/AddLLVM.cmake | 9 --------- | ||
| 15 | llvm/cmake/modules/TableGen.cmake | 6 ------ | ||
| 16 | 2 files changed, 15 deletions(-) | ||
| 17 | |||
| 18 | diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake | ||
| 19 | index 0df6845..b79f4fa 100644 | ||
| 20 | --- a/llvm/cmake/modules/AddLLVM.cmake | ||
| 21 | +++ b/llvm/cmake/modules/AddLLVM.cmake | ||
| 22 | @@ -866,12 +866,6 @@ macro(add_llvm_tool name) | ||
| 23 | |||
| 24 | if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY) | ||
| 25 | if( LLVM_BUILD_TOOLS ) | ||
| 26 | - if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR | ||
| 27 | - NOT LLVM_DISTRIBUTION_COMPONENTS) | ||
| 28 | - set(export_to_llvmexports EXPORT LLVMExports) | ||
| 29 | - set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True) | ||
| 30 | - endif() | ||
| 31 | - | ||
| 32 | install(TARGETS ${name} | ||
| 33 | ${export_to_llvmexports} | ||
| 34 | RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR} | ||
| 35 | @@ -884,9 +878,6 @@ macro(add_llvm_tool name) | ||
| 36 | endif() | ||
| 37 | endif() | ||
| 38 | endif() | ||
| 39 | - if( LLVM_BUILD_TOOLS ) | ||
| 40 | - set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) | ||
| 41 | - endif() | ||
| 42 | set_target_properties(${name} PROPERTIES FOLDER "Tools") | ||
| 43 | endmacro(add_llvm_tool name) | ||
| 44 | |||
| 45 | diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake | ||
| 46 | index 3c84ae7..141219f 100644 | ||
| 47 | --- a/llvm/cmake/modules/TableGen.cmake | ||
| 48 | +++ b/llvm/cmake/modules/TableGen.cmake | ||
| 49 | @@ -164,14 +164,8 @@ macro(add_tablegen target project) | ||
| 50 | endif() | ||
| 51 | |||
| 52 | if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY) | ||
| 53 | - if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR | ||
| 54 | - NOT LLVM_DISTRIBUTION_COMPONENTS) | ||
| 55 | - set(export_to_llvmexports EXPORT LLVMExports) | ||
| 56 | - endif() | ||
| 57 | - | ||
| 58 | install(TARGETS ${target} | ||
| 59 | ${export_to_llvmexports} | ||
| 60 | RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR}) | ||
| 61 | endif() | ||
| 62 | - set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target}) | ||
| 63 | endmacro() | ||
| 64 | -- | ||
| 65 | 2.7.4 | ||
| 66 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch new file mode 100644 index 00000000..0dfc537b --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch | |||
| @@ -0,0 +1,59 @@ | |||
| 1 | From 6c33fb58869ffb17106047c45ab8d3856966eaf7 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Anuj Mittal <anuj.mittal@intel.com> | ||
| 3 | Date: Tue, 26 Mar 2019 14:11:29 +0800 | ||
| 4 | Subject: [PATCH] point to correct clang project and tblgen | ||
| 5 | |||
| 6 | Point to correct path for clang project as per the way we unpack. Also | ||
| 7 | let llvm-tblgen path be passed from recipe itself. | ||
| 8 | |||
| 9 | Also since we're going to do the patching ourselves, no need to look for | ||
| 10 | git through cmake. | ||
| 11 | |||
| 12 | Upstream-Status: Inappropriate [OE specific] | ||
| 13 | --- | ||
| 14 | CMakeLists.txt | 8 ++++---- | ||
| 15 | 1 file changed, 4 insertions(+), 4 deletions(-) | ||
| 16 | |||
| 17 | diff --git a/CMakeLists.txt b/CMakeLists.txt | ||
| 18 | index 174133b..c769f08 100644 | ||
| 19 | --- a/CMakeLists.txt | ||
| 20 | +++ b/CMakeLists.txt | ||
| 21 | @@ -53,7 +53,7 @@ endif(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) | ||
| 22 | include(AddLLVM) | ||
| 23 | include(TableGen) | ||
| 24 | |||
| 25 | -find_package(Git REQUIRED) | ||
| 26 | +#find_package(Git REQUIRED) | ||
| 27 | |||
| 28 | if (NOT WIN32) | ||
| 29 | add_subdirectory( linux_linker ) | ||
| 30 | @@ -80,7 +80,7 @@ set(TARGET_NAME ${COMMON_CLANG_LIBRARY_NAME}${BUILD_PLATFORM} ) | ||
| 31 | |||
| 32 | if(NOT USE_PREBUILT_LLVM) | ||
| 33 | set(TARGET_BRANCH "ocl-open-80") | ||
| 34 | - set(CLANG_SOURCE_DIR ${LLVM_SOURCE_DIR}/tools/clang) | ||
| 35 | + set(CLANG_SOURCE_DIR ${LLVM_SOURCE_DIR}/../clang) | ||
| 36 | set(CLANG_BASE_REVISION a03da8be08a208122e292016cb6cea1f30229677) | ||
| 37 | |||
| 38 | set(SPIRV_SOURCE_DIR ${LLVM_SOURCE_DIR}/projects/llvm-spirv) | ||
| 39 | @@ -102,7 +102,7 @@ endif(NOT USE_PREBUILT_LLVM) | ||
| 40 | set (COMPILE_OPTIONS_TD opencl_clang_options.td) | ||
| 41 | set (COMPILE_OPTIONS_INC opencl_clang_options.inc) | ||
| 42 | |||
| 43 | -set(LLVM_TABLEGEN_EXE "llvm-tblgen") | ||
| 44 | +#set(LLVM_TABLEGEN_EXE "llvm-tblgen") | ||
| 45 | set(LLVM_TARGET_DEFINITIONS ${COMPILE_OPTIONS_TD}) | ||
| 46 | if(USE_PREBUILT_LLVM) | ||
| 47 | set(TABLEGEN_ADDITIONAL -I ${LLVM_INCLUDE_DIRS}) | ||
| 48 | @@ -153,7 +153,7 @@ endif() | ||
| 49 | |||
| 50 | if(NOT USE_PREBUILT_LLVM) | ||
| 51 | set(CLANG_BINARY_DIR ${LLVM_BINARY_DIR}/tools/clang/) | ||
| 52 | - set(CLANG_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}/tools/clang/) | ||
| 53 | + set(CLANG_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}/../clang/) | ||
| 54 | include_directories( | ||
| 55 | ${CLANG_BINARY_DIR}/include # for tablegened includes | ||
| 56 | ${CLANG_SOURCE_DIR}/include # for basic headers | ||
| 57 | -- | ||
| 58 | 2.19.1 | ||
| 59 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch new file mode 100644 index 00000000..2e935a13 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch | |||
| @@ -0,0 +1,294 @@ | |||
| 1 | From c94ec28600255098ffb9d73d1b386a7c8a535590 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Andrew Savonichev <andrew.savonichev@intel.com> | ||
| 3 | Date: Thu, 21 Feb 2019 11:02:10 +0000 | ||
| 4 | Subject: [PATCH 2/2] [OpenCL] Simplify LLVM IR generated for OpenCL blocks | ||
| 5 | |||
| 6 | Summary: | ||
| 7 | Emit direct call of block invoke functions when possible, i.e. in case the | ||
| 8 | block is not passed as a function argument. | ||
| 9 | Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()` | ||
| 10 | |||
| 11 | Reviewers: Anastasia, yaxunl, svenvh | ||
| 12 | |||
| 13 | Reviewed By: Anastasia | ||
| 14 | |||
| 15 | Subscribers: cfe-commits | ||
| 16 | |||
| 17 | Tags: #clang | ||
| 18 | |||
| 19 | Differential Revision: https://reviews.llvm.org/D58388 | ||
| 20 | |||
| 21 | git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354568 91177308-0d34-0410-b5e6-96231b3b80d8 | ||
| 22 | |||
| 23 | Upstream-Status: Backport | ||
| 24 | [https://github.com/llvm-mirror/clang/commit/eae71f8d05ce550c4e2595c9b7082cc2c7882c58] | ||
| 25 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
| 26 | --- | ||
| 27 | lib/CodeGen/CGBlocks.cpp | 77 +++++++++++++------------- | ||
| 28 | lib/CodeGen/CGOpenCLRuntime.cpp | 30 +++++++--- | ||
| 29 | lib/CodeGen/CGOpenCLRuntime.h | 4 ++ | ||
| 30 | test/CodeGenOpenCL/blocks.cl | 10 +--- | ||
| 31 | test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 34 +++++++++--- | ||
| 32 | 5 files changed, 91 insertions(+), 64 deletions(-) | ||
| 33 | |||
| 34 | diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp | ||
| 35 | index fa3c3ee..10a0238 100644 | ||
| 36 | --- a/lib/CodeGen/CGBlocks.cpp | ||
| 37 | +++ b/lib/CodeGen/CGBlocks.cpp | ||
| 38 | @@ -1261,52 +1261,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, | ||
| 39 | ReturnValueSlot ReturnValue) { | ||
| 40 | const BlockPointerType *BPT = | ||
| 41 | E->getCallee()->getType()->getAs<BlockPointerType>(); | ||
| 42 | - | ||
| 43 | llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); | ||
| 44 | - | ||
| 45 | - // Get a pointer to the generic block literal. | ||
| 46 | - // For OpenCL we generate generic AS void ptr to be able to reuse the same | ||
| 47 | - // block definition for blocks with captures generated as private AS local | ||
| 48 | - // variables and without captures generated as global AS program scope | ||
| 49 | - // variables. | ||
| 50 | - unsigned AddrSpace = 0; | ||
| 51 | - if (getLangOpts().OpenCL) | ||
| 52 | - AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); | ||
| 53 | - | ||
| 54 | - llvm::Type *BlockLiteralTy = | ||
| 55 | - llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); | ||
| 56 | - | ||
| 57 | - // Bitcast the callee to a block literal. | ||
| 58 | - BlockPtr = | ||
| 59 | - Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); | ||
| 60 | - | ||
| 61 | - // Get the function pointer from the literal. | ||
| 62 | - llvm::Value *FuncPtr = | ||
| 63 | - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, | ||
| 64 | - CGM.getLangOpts().OpenCL ? 2 : 3); | ||
| 65 | - | ||
| 66 | - // Add the block literal. | ||
| 67 | + llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType(); | ||
| 68 | + llvm::Value *Func = nullptr; | ||
| 69 | + QualType FnType = BPT->getPointeeType(); | ||
| 70 | + ASTContext &Ctx = getContext(); | ||
| 71 | CallArgList Args; | ||
| 72 | |||
| 73 | - QualType VoidPtrQualTy = getContext().VoidPtrTy; | ||
| 74 | - llvm::Type *GenericVoidPtrTy = VoidPtrTy; | ||
| 75 | if (getLangOpts().OpenCL) { | ||
| 76 | - GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType(); | ||
| 77 | - VoidPtrQualTy = | ||
| 78 | - getContext().getPointerType(getContext().getAddrSpaceQualType( | ||
| 79 | - getContext().VoidTy, LangAS::opencl_generic)); | ||
| 80 | - } | ||
| 81 | - | ||
| 82 | - BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); | ||
| 83 | - Args.add(RValue::get(BlockPtr), VoidPtrQualTy); | ||
| 84 | - | ||
| 85 | - QualType FnType = BPT->getPointeeType(); | ||
| 86 | + // For OpenCL, BlockPtr is already casted to generic block literal. | ||
| 87 | + | ||
| 88 | + // First argument of a block call is a generic block literal casted to | ||
| 89 | + // generic void pointer, i.e. i8 addrspace(4)* | ||
| 90 | + llvm::Value *BlockDescriptor = Builder.CreatePointerCast( | ||
| 91 | + BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType()); | ||
| 92 | + QualType VoidPtrQualTy = Ctx.getPointerType( | ||
| 93 | + Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic)); | ||
| 94 | + Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy); | ||
| 95 | + // And the rest of the arguments. | ||
| 96 | + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); | ||
| 97 | + | ||
| 98 | + // We *can* call the block directly unless it is a function argument. | ||
| 99 | + if (!isa<ParmVarDecl>(E->getCalleeDecl())) | ||
| 100 | + Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); | ||
| 101 | + else { | ||
| 102 | + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2); | ||
| 103 | + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); | ||
| 104 | + } | ||
| 105 | + } else { | ||
| 106 | + // Bitcast the block literal to a generic block literal. | ||
| 107 | + BlockPtr = Builder.CreatePointerCast( | ||
| 108 | + BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal"); | ||
| 109 | + // Get pointer to the block invoke function | ||
| 110 | + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3); | ||
| 111 | |||
| 112 | - // And the rest of the arguments. | ||
| 113 | - EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); | ||
| 114 | + // First argument is a block literal casted to a void pointer | ||
| 115 | + BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy); | ||
| 116 | + Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy); | ||
| 117 | + // And the rest of the arguments. | ||
| 118 | + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); | ||
| 119 | |||
| 120 | - // Load the function. | ||
| 121 | - llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); | ||
| 122 | + // Load the function. | ||
| 123 | + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); | ||
| 124 | + } | ||
| 125 | |||
| 126 | const FunctionType *FuncTy = FnType->castAs<FunctionType>(); | ||
| 127 | const CGFunctionInfo &FnInfo = | ||
| 128 | diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp | ||
| 129 | index 7f6f595..75003e5 100644 | ||
| 130 | --- a/lib/CodeGen/CGOpenCLRuntime.cpp | ||
| 131 | +++ b/lib/CodeGen/CGOpenCLRuntime.cpp | ||
| 132 | @@ -123,6 +123,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { | ||
| 133 | CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); | ||
| 134 | } | ||
| 135 | |||
| 136 | +// Get the block literal from an expression derived from the block expression. | ||
| 137 | +// OpenCL v2.0 s6.12.5: | ||
| 138 | +// Block variable declarations are implicitly qualified with const. Therefore | ||
| 139 | +// all block variables must be initialized at declaration time and may not be | ||
| 140 | +// reassigned. | ||
| 141 | +static const BlockExpr *getBlockExpr(const Expr *E) { | ||
| 142 | + const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop. | ||
| 143 | + while(!isa<BlockExpr>(E) && E != Prev) { | ||
| 144 | + Prev = E; | ||
| 145 | + E = E->IgnoreCasts(); | ||
| 146 | + if (auto DR = dyn_cast<DeclRefExpr>(E)) { | ||
| 147 | + E = cast<VarDecl>(DR->getDecl())->getInit(); | ||
| 148 | + } | ||
| 149 | + } | ||
| 150 | + return cast<BlockExpr>(E); | ||
| 151 | +} | ||
| 152 | + | ||
| 153 | /// Record emitted llvm invoke function and llvm block literal for the | ||
| 154 | /// corresponding block expression. | ||
| 155 | void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, | ||
| 156 | @@ -137,20 +154,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, | ||
| 157 | EnqueuedBlockMap[E].Kernel = nullptr; | ||
| 158 | } | ||
| 159 | |||
| 160 | +llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { | ||
| 161 | + return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc; | ||
| 162 | +} | ||
| 163 | + | ||
| 164 | CGOpenCLRuntime::EnqueuedBlockInfo | ||
| 165 | CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { | ||
| 166 | CGF.EmitScalarExpr(E); | ||
| 167 | |||
| 168 | // The block literal may be assigned to a const variable. Chasing down | ||
| 169 | // to get the block literal. | ||
| 170 | - if (auto DR = dyn_cast<DeclRefExpr>(E)) { | ||
| 171 | - E = cast<VarDecl>(DR->getDecl())->getInit(); | ||
| 172 | - } | ||
| 173 | - E = E->IgnoreImplicit(); | ||
| 174 | - if (auto Cast = dyn_cast<CastExpr>(E)) { | ||
| 175 | - E = Cast->getSubExpr(); | ||
| 176 | - } | ||
| 177 | - auto *Block = cast<BlockExpr>(E); | ||
| 178 | + const BlockExpr *Block = getBlockExpr(E); | ||
| 179 | |||
| 180 | assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && | ||
| 181 | "Block expression not emitted"); | ||
| 182 | diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h | ||
| 183 | index 750721f..4effc7e 100644 | ||
| 184 | --- a/lib/CodeGen/CGOpenCLRuntime.h | ||
| 185 | +++ b/lib/CodeGen/CGOpenCLRuntime.h | ||
| 186 | @@ -92,6 +92,10 @@ public: | ||
| 187 | /// \param Block block literal emitted for the block expression. | ||
| 188 | void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, | ||
| 189 | llvm::Value *Block); | ||
| 190 | + | ||
| 191 | + /// \return LLVM block invoke function emitted for an expression derived from | ||
| 192 | + /// the block expression. | ||
| 193 | + llvm::Function *getInvokeFunction(const Expr *E); | ||
| 194 | }; | ||
| 195 | |||
| 196 | } | ||
| 197 | diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl | ||
| 198 | index 19aacc3..ab5a2c6 100644 | ||
| 199 | --- a/test/CodeGenOpenCL/blocks.cl | ||
| 200 | +++ b/test/CodeGenOpenCL/blocks.cl | ||
| 201 | @@ -39,11 +39,8 @@ void foo(){ | ||
| 202 | // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* | ||
| 203 | // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]], | ||
| 204 | // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]] | ||
| 205 | - // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 | ||
| 206 | // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* | ||
| 207 | - // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] | ||
| 208 | - // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)* | ||
| 209 | - // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]]) | ||
| 210 | + // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]]) | ||
| 211 | // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2 | ||
| 212 | // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]] | ||
| 213 | // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 | ||
| 214 | @@ -53,11 +50,8 @@ void foo(){ | ||
| 215 | // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic* | ||
| 216 | // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]], | ||
| 217 | // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]] | ||
| 218 | - // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2 | ||
| 219 | // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8* | ||
| 220 | - // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]] | ||
| 221 | - // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)* | ||
| 222 | - // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]]) | ||
| 223 | + // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]]) | ||
| 224 | |||
| 225 | int (^ block_B)(void) = ^{ | ||
| 226 | return i; | ||
| 227 | diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 228 | index 8445016..1566912 100644 | ||
| 229 | --- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 230 | +++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 231 | @@ -312,9 +312,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 232 | }; | ||
| 233 | |||
| 234 | // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. | ||
| 235 | - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) | ||
| 236 | - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* | ||
| 237 | - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
| 238 | + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
| 239 | block_A(); | ||
| 240 | |||
| 241 | // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. | ||
| 242 | @@ -333,15 +331,35 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 243 | unsigned size = get_kernel_work_group_size(block_A); | ||
| 244 | |||
| 245 | // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted. | ||
| 246 | - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) | ||
| 247 | - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* | ||
| 248 | - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
| 249 | + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
| 250 | block_A(); | ||
| 251 | |||
| 252 | + // Make sure that block invoke function is resolved correctly after sequence of assignements. | ||
| 253 | + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* | ||
| 254 | + // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* | ||
| 255 | + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) | ||
| 256 | + // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), | ||
| 257 | + // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1, | ||
| 258 | + bl_t b1 = block_G; | ||
| 259 | + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* | ||
| 260 | + // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* | ||
| 261 | + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) | ||
| 262 | + // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), | ||
| 263 | + // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2, | ||
| 264 | + bl_t b2 = b1; | ||
| 265 | + // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* | ||
| 266 | + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) | ||
| 267 | + // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null) | ||
| 268 | + b2(0); | ||
| 269 | + // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]]. | ||
| 270 | + // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl( | ||
| 271 | + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), | ||
| 272 | + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
| 273 | + size = get_kernel_preferred_work_group_size_multiple(b2); | ||
| 274 | + | ||
| 275 | void (^block_C)(void) = ^{ | ||
| 276 | callee(i, a); | ||
| 277 | }; | ||
| 278 | - | ||
| 279 | // Emits block literal on stack and block kernel [[INVLK3]]. | ||
| 280 | // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke | ||
| 281 | // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue | ||
| 282 | @@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 283 | // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}}) | ||
| 284 | // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}}) | ||
| 285 | // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}}) | ||
| 286 | +// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) | ||
| 287 | // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}}) | ||
| 288 | // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) | ||
| 289 | -// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) | ||
| 290 | // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}}) | ||
| 291 | // COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}}) | ||
| 292 | -- | ||
| 293 | 1.8.3.1 | ||
| 294 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch new file mode 100644 index 00000000..510c7c6e --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch | |||
| @@ -0,0 +1,61 @@ | |||
| 1 | From 29e2813a2ab7d5569860bb07892dfef7b5374d96 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Yaxun Liu <Yaxun.Liu@amd.com> | ||
| 3 | Date: Tue, 26 Feb 2019 16:20:41 +0000 | ||
| 4 | Subject: [PATCH] [OpenCL] Fix assertion due to blocks | ||
| 5 | |||
| 6 | A recent change caused assertion in CodeGenFunction::EmitBlockCallExpr when a block is called. | ||
| 7 | |||
| 8 | There is code | ||
| 9 | |||
| 10 | Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); | ||
| 11 | getCalleeDecl calls Expr::getReferencedDeclOfCallee, which does not handle | ||
| 12 | BlockExpr and returns nullptr, which causes isa to assert. | ||
| 13 | |||
| 14 | This patch fixes that. | ||
| 15 | |||
| 16 | Differential Revision: https://reviews.llvm.org/D58658 | ||
| 17 | |||
| 18 | |||
| 19 | git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354893 91177308-0d34-0410-b5e6-96231b3b80d8 | ||
| 20 | |||
| 21 | Upstream-Status: Backport | ||
| 22 | [https://github.com/llvm-mirror/clang/commit/29e2813a2ab7d5569860bb07892dfef7b5374d96] | ||
| 23 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
| 24 | --- | ||
| 25 | lib/AST/Expr.cpp | 2 ++ | ||
| 26 | test/CodeGenOpenCL/blocks.cl | 6 ++++++ | ||
| 27 | 2 files changed, 8 insertions(+) | ||
| 28 | |||
| 29 | diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp | ||
| 30 | index aef1eab..85690c7 100644 | ||
| 31 | --- a/lib/AST/Expr.cpp | ||
| 32 | +++ b/lib/AST/Expr.cpp | ||
| 33 | @@ -1358,6 +1358,8 @@ Decl *Expr::getReferencedDeclOfCallee() { | ||
| 34 | return DRE->getDecl(); | ||
| 35 | if (MemberExpr *ME = dyn_cast<MemberExpr>(CEE)) | ||
| 36 | return ME->getMemberDecl(); | ||
| 37 | + if (auto *BE = dyn_cast<BlockExpr>(CEE)) | ||
| 38 | + return BE->getBlockDecl(); | ||
| 39 | |||
| 40 | return nullptr; | ||
| 41 | } | ||
| 42 | diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl | ||
| 43 | index ab5a2c6..c3e2685 100644 | ||
| 44 | --- a/test/CodeGenOpenCL/blocks.cl | ||
| 45 | +++ b/test/CodeGenOpenCL/blocks.cl | ||
| 46 | @@ -90,6 +90,12 @@ int get42() { | ||
| 47 | return blockArgFunc(^{return 42;}); | ||
| 48 | } | ||
| 49 | |||
| 50 | +// COMMON-LABEL: define {{.*}}@call_block | ||
| 51 | +// call {{.*}}@__call_block_block_invoke | ||
| 52 | +int call_block() { | ||
| 53 | + return ^int(int num) { return num; } (11); | ||
| 54 | +} | ||
| 55 | + | ||
| 56 | // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size" | ||
| 57 | // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align" | ||
| 58 | |||
| 59 | -- | ||
| 60 | 1.8.3.1 | ||
| 61 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend new file mode 100644 index 00000000..f536f0f2 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:" | ||
| 2 | |||
| 3 | SRC_URI_append_intel-x86-common = " \ | ||
| 4 | git://github.com/intel/opencl-clang.git;protocol=https;branch=ocl-open-80;destsuffix=git/llvm/projects/opencl-clang;name=opencl-clang \ | ||
| 5 | git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_80;destsuffix=git/llvm/projects/llvm-spirv;name=spirv \ | ||
| 6 | file://0001-point-to-correct-clang.patch;patchdir=llvm/projects/opencl-clang \ | ||
| 7 | file://0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch;patchdir=clang \ | ||
| 8 | file://0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch;patchdir=clang \ | ||
| 9 | file://0003-OpenCL-Fix-assertion-due-to-blocks.patch;patchdir=clang \ | ||
| 10 | file://0001-dont-export-targets-for-binaries.patch \ | ||
| 11 | file://0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch;patchdir=llvm/projects/llvm-spirv \ | ||
| 12 | " | ||
| 13 | |||
| 14 | SRCREV_opencl-clang = "daf5e4dd718477ae8cf89a283c653939d9182f15" | ||
| 15 | SRCREV_spirv = "bd0f28fb92061d49c0f120b4dac3fd8956006745" | ||
| 16 | |||
