diff options
| author | Naveen Saini <naveen.kumar.saini@intel.com> | 2019-09-12 22:21:43 +0800 |
|---|---|---|
| committer | Anuj Mittal <anuj.mittal@intel.com> | 2019-09-13 16:17:22 +0800 |
| commit | 1b076fd8dba94042b76e37f18f2d3bc74cbe717b (patch) | |
| tree | e7475c621763efc21fedf9639a98561f8a2dcb37 /dynamic-layers/clang-layer | |
| parent | 420247ef2db5c08ade2a34d4170f8db8aa7bc439 (diff) | |
| download | meta-intel-1b076fd8dba94042b76e37f18f2d3bc74cbe717b.tar.gz | |
llvm-project-source: update SPIRV-LLVM-Translator 8.0.0 -> 9.0.0
Remove all the backported patches which are available in 9.0.0 release.
Few patches were recommended from llvm-patches repo:
https://github.com/intel/intel-graphics-compiler/blob/master/documentation/build_ubuntu.md
https://github.com/intel/llvm-patches/commit/3906cc086f675847ca99b08107d18e083803d53c
Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
Diffstat (limited to 'dynamic-layers/clang-layer')
10 files changed, 285 insertions, 1552 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch deleted file mode 100644 index 1c491402..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch +++ /dev/null | |||
| @@ -1,156 +0,0 @@ | |||
| 1 | From 39a3ac0065c23d1e2d55dfd8792cc28a146a4307 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Alexey Bader <alexey.bader@intel.com> | ||
| 3 | Date: Tue, 19 Feb 2019 15:19:06 +0000 | ||
| 4 | Subject: [PATCH 1/2] [OpenCL] Change type of block pointer for OpenCL | ||
| 5 | |||
| 6 | Summary: | ||
| 7 | |||
| 8 | For some reason OpenCL blocks in LLVM IR are represented as function pointers. | ||
| 9 | These pointers do not point to any real function and never get called. Actually | ||
| 10 | they point to some structure, which in turn contains pointer to the real block | ||
| 11 | invoke function. | ||
| 12 | This patch changes represntation of OpenCL blocks in LLVM IR from function | ||
| 13 | pointers to pointers to `%struct.__block_literal_generic`. | ||
| 14 | Such representation allows to avoid unnecessary bitcasts and simplifies | ||
| 15 | further processing (e.g. translation to SPIR-V ) of the module for targets | ||
| 16 | which do not support function pointers. | ||
| 17 | |||
| 18 | Patch by: Alexey Sotkin. | ||
| 19 | |||
| 20 | Reviewers: Anastasia, yaxunl, svenvh | ||
| 21 | |||
| 22 | Reviewed By: Anastasia | ||
| 23 | |||
| 24 | Subscribers: alexbatashev, cfe-commits | ||
| 25 | |||
| 26 | Tags: #clang | ||
| 27 | |||
| 28 | Differential Revision: https://reviews.llvm.org/D58277 | ||
| 29 | |||
| 30 | git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354337 91177308-0d34-0410-b5e6-96231b3b80d8 | ||
| 31 | |||
| 32 | Upstream-Status: Backport | ||
| 33 | [https://github.com/llvm-mirror/clang/commit/283f308bdb5893bab1f36791711346e746045f94] | ||
| 34 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
| 35 | --- | ||
| 36 | lib/CodeGen/CodeGenTypes.cpp | 4 +++- | ||
| 37 | test/CodeGenOpenCL/blocks.cl | 18 ++++++++---------- | ||
| 38 | test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 18 +++++++++--------- | ||
| 39 | 3 files changed, 20 insertions(+), 20 deletions(-) | ||
| 40 | |||
| 41 | diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp | ||
| 42 | index 2acf1ac..93b3ebf 100644 | ||
| 43 | --- a/lib/CodeGen/CodeGenTypes.cpp | ||
| 44 | +++ b/lib/CodeGen/CodeGenTypes.cpp | ||
| 45 | @@ -637,7 +637,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { | ||
| 46 | |||
| 47 | case Type::BlockPointer: { | ||
| 48 | const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType(); | ||
| 49 | - llvm::Type *PointeeType = ConvertTypeForMem(FTy); | ||
| 50 | + llvm::Type *PointeeType = CGM.getLangOpts().OpenCL | ||
| 51 | + ? CGM.getGenericBlockLiteralType() | ||
| 52 | + : ConvertTypeForMem(FTy); | ||
| 53 | unsigned AS = Context.getTargetAddressSpace(FTy); | ||
| 54 | ResultType = llvm::PointerType::get(PointeeType, AS); | ||
| 55 | break; | ||
| 56 | diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl | ||
| 57 | index 675240c..19aacc3 100644 | ||
| 58 | --- a/test/CodeGenOpenCL/blocks.cl | ||
| 59 | +++ b/test/CodeGenOpenCL/blocks.cl | ||
| 60 | @@ -35,11 +35,10 @@ void foo(){ | ||
| 61 | // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3 | ||
| 62 | // SPIR: %[[i_value:.*]] = load i32, i32* %i | ||
| 63 | // SPIR: store i32 %[[i_value]], i32* %[[block_captured]], | ||
| 64 | - // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()* | ||
| 65 | - // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)* | ||
| 66 | - // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]], | ||
| 67 | - // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]] | ||
| 68 | - // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* | ||
| 69 | + // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic* | ||
| 70 | + // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* | ||
| 71 | + // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]], | ||
| 72 | + // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]] | ||
| 73 | // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 | ||
| 74 | // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* | ||
| 75 | // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] | ||
| 76 | @@ -50,11 +49,10 @@ void foo(){ | ||
| 77 | // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 | ||
| 78 | // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i | ||
| 79 | // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]], | ||
| 80 | - // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)* | ||
| 81 | - // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()* | ||
| 82 | - // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]], | ||
| 83 | - // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]] | ||
| 84 | - // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic* | ||
| 85 | + // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)* | ||
| 86 | + // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic* | ||
| 87 | + // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]], | ||
| 88 | + // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]] | ||
| 89 | // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2 | ||
| 90 | // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8* | ||
| 91 | // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]] | ||
| 92 | diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 93 | index 4732194..8445016 100644 | ||
| 94 | --- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 95 | +++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 96 | @@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t; | ||
| 97 | |||
| 98 | // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself. | ||
| 99 | // COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) } | ||
| 100 | -// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*) | ||
| 101 | +// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*) | ||
| 102 | |||
| 103 | // For anonymous blocks without captures, emit block literals as global variable. | ||
| 104 | // COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } | ||
| 105 | @@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 106 | // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue | ||
| 107 | // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags | ||
| 108 | // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke | ||
| 109 | - // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()* | ||
| 110 | - // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()* | ||
| 111 | - // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* | ||
| 112 | + // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic* | ||
| 113 | + // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic* | ||
| 114 | + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)* | ||
| 115 | // COMMON-LABEL: call i32 @__enqueue_kernel_basic( | ||
| 116 | // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, | ||
| 117 | // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), | ||
| 118 | @@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 119 | // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)* | ||
| 120 | // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* | ||
| 121 | // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke | ||
| 122 | - // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* | ||
| 123 | - // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* | ||
| 124 | + // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic* | ||
| 125 | + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)* | ||
| 126 | // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events | ||
| 127 | // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], | ||
| 128 | // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), | ||
| 129 | @@ -300,13 +300,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 130 | // Emits global block literal [[BLG8]] and invoke function [[INVG8]]. | ||
| 131 | // The full type of these expressions are long (and repeated elsewhere), so we | ||
| 132 | // capture it as part of the regex for convenience and clarity. | ||
| 133 | - // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A | ||
| 134 | + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A | ||
| 135 | void (^const block_A)(void) = ^{ | ||
| 136 | return; | ||
| 137 | }; | ||
| 138 | |||
| 139 | // Emits global block literal [[BLG9]] and invoke function [[INVG9]]. | ||
| 140 | - // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B | ||
| 141 | + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B | ||
| 142 | void (^const block_B)(local void *) = ^(local void *a) { | ||
| 143 | return; | ||
| 144 | }; | ||
| 145 | @@ -346,7 +346,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 146 | // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke | ||
| 147 | // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue | ||
| 148 | // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags | ||
| 149 | - // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)* | ||
| 150 | + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)* | ||
| 151 | // COMMON-LABEL: call i32 @__enqueue_kernel_basic( | ||
| 152 | // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, | ||
| 153 | // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), | ||
| 154 | -- | ||
| 155 | 1.8.3.1 | ||
| 156 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch deleted file mode 100644 index 2037421b..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch +++ /dev/null | |||
| @@ -1,986 +0,0 @@ | |||
| 1 | From 177cce531fd3665bb964a03db51890e0241e3e72 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Alexey Sotkin <alexey.sotkin@intel.com> | ||
| 3 | Date: Thu, 21 Feb 2019 17:14:36 +0300 | ||
| 4 | Subject: [PATCH] Update LowerOpenCL pass to handle new blocks represntation in | ||
| 5 | LLVM IR | ||
| 6 | |||
| 7 | Upstream-Status: Backport [https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/bd6ddfaf7232cd81c7f2fe9877e66f286731bd8e] | ||
| 8 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
| 9 | |||
| 10 | --- | ||
| 11 | lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 249 ++++-------------------------- | ||
| 12 | test/global_block.ll | 71 ++++----- | ||
| 13 | test/literal-struct.ll | 31 ++-- | ||
| 14 | test/transcoding/block_w_struct_return.ll | 47 +++--- | ||
| 15 | test/transcoding/enqueue_kernel.ll | 237 ++++++++++++++++------------ | ||
| 16 | 5 files changed, 235 insertions(+), 400 deletions(-) | ||
| 17 | |||
| 18 | diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp | ||
| 19 | index c80bf04..b42a4ec 100644 | ||
| 20 | --- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp | ||
| 21 | +++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp | ||
| 22 | @@ -40,207 +40,34 @@ | ||
| 23 | // In both cases values with function type used as intermediate representation | ||
| 24 | // for block literal structure. | ||
| 25 | // | ||
| 26 | -// This pass is designed to find such cases and simplify them to avoid any | ||
| 27 | -// function pointer types occurrences in LLVM IR in 4 steps. | ||
| 28 | -// | ||
| 29 | -// 1. Find all function pointer allocas, like | ||
| 30 | -// %block = alloca void () * | ||
| 31 | -// | ||
| 32 | -// Then find a single store to that alloca: | ||
| 33 | -// %blockLit = alloca <{ i32, i32, ...}>, align 4 | ||
| 34 | -// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()* | ||
| 35 | -// > store void ()* %0, void ()** %block, align 4 | ||
| 36 | -// | ||
| 37 | -// And replace the alloca users by new instructions which used stored value | ||
| 38 | -// %blockLit itself instead of function pointer alloca %block. | ||
| 39 | -// | ||
| 40 | -// 2. Find consecutive casts from block literal type to i8 addrspace(4)* | ||
| 41 | -// used function pointers as an intermediate type: | ||
| 42 | -// %0 = bitcast <{ i32, i32 }> %block to void() * | ||
| 43 | -// %1 = addrspacecast void() * %0 to i8 addrspace(4)* | ||
| 44 | -// And simplify them: | ||
| 45 | -// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)* | ||
| 46 | -// | ||
| 47 | -// 3. Find all unused instructions with function pointer type occured after | ||
| 48 | -// pp.1-2 and remove them. | ||
| 49 | -// | ||
| 50 | -// 4. Find unused globals with function pointer type, like | ||
| 51 | -// @block = constant void ()* | ||
| 52 | -// bitcast ({ i32, i32 }* @__block_literal_global to void ()* | ||
| 53 | -// | ||
| 54 | -// And remove them. | ||
| 55 | +// In LLVM IR produced by clang, blocks are represented with the following | ||
| 56 | +// structure: | ||
| 57 | +// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
| 58 | +// Pointers to block invoke functions are stored in the third field. Clang | ||
| 59 | +// replaces inderect function calls in all cases except if block is passed as a | ||
| 60 | +// function argument. Note that it is somewhat unclear if the OpenCL C spec | ||
| 61 | +// should allow passing blocks as function argumernts. This pass is not supposed | ||
| 62 | +// to work correctly with such functions. | ||
| 63 | +// Clang though has to store function pointers to this structure. Purpose of | ||
| 64 | +// this pass is to replace store of function pointers(not allowed in SPIR-V) | ||
| 65 | +// with null pointers. | ||
| 66 | // | ||
| 67 | //===----------------------------------------------------------------------===// | ||
| 68 | #define DEBUG_TYPE "spv-lower-ocl-blocks" | ||
| 69 | |||
| 70 | -#include "OCLUtil.h" | ||
| 71 | #include "SPIRVInternal.h" | ||
| 72 | |||
| 73 | -#include "llvm/ADT/SetVector.h" | ||
| 74 | -#include "llvm/Analysis/ValueTracking.h" | ||
| 75 | -#include "llvm/IR/GlobalVariable.h" | ||
| 76 | -#include "llvm/IR/InstIterator.h" | ||
| 77 | #include "llvm/IR/Module.h" | ||
| 78 | #include "llvm/Pass.h" | ||
| 79 | -#include "llvm/PassSupport.h" | ||
| 80 | -#include "llvm/Support/Casting.h" | ||
| 81 | +#include "llvm/Support/Regex.h" | ||
| 82 | |||
| 83 | using namespace llvm; | ||
| 84 | |||
| 85 | namespace { | ||
| 86 | |||
| 87 | -static void | ||
| 88 | -removeUnusedFunctionPtrInst(Instruction *I, | ||
| 89 | - SmallSetVector<Instruction *, 16> &FuncPtrInsts) { | ||
| 90 | - for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) { | ||
| 91 | - Instruction *OpI = dyn_cast<Instruction>(I->getOperand(OpIdx)); | ||
| 92 | - I->setOperand(OpIdx, nullptr); | ||
| 93 | - if (OpI && OpI != I && OpI->user_empty()) | ||
| 94 | - FuncPtrInsts.insert(OpI); | ||
| 95 | - } | ||
| 96 | - I->eraseFromParent(); | ||
| 97 | -} | ||
| 98 | - | ||
| 99 | -static bool isFuncPtrAlloca(const AllocaInst *AI) { | ||
| 100 | - auto *ET = dyn_cast<PointerType>(AI->getAllocatedType()); | ||
| 101 | - return ET && ET->getElementType()->isFunctionTy(); | ||
| 102 | -} | ||
| 103 | - | ||
| 104 | -static bool hasFuncPtrType(const Value *V) { | ||
| 105 | - auto *PT = dyn_cast<PointerType>(V->getType()); | ||
| 106 | - return PT && PT->getElementType()->isFunctionTy(); | ||
| 107 | -} | ||
| 108 | - | ||
| 109 | -static bool isFuncPtrInst(const Instruction *I) { | ||
| 110 | - if (auto *AI = dyn_cast<AllocaInst>(I)) | ||
| 111 | - return isFuncPtrAlloca(AI); | ||
| 112 | - | ||
| 113 | - for (auto &Op : I->operands()) { | ||
| 114 | - if (auto *AI = dyn_cast<AllocaInst>(Op)) | ||
| 115 | - return isFuncPtrAlloca(AI); | ||
| 116 | - | ||
| 117 | - auto *OpI = dyn_cast<Instruction>(&Op); | ||
| 118 | - if (OpI && OpI != I && hasFuncPtrType(OpI)) | ||
| 119 | - return true; | ||
| 120 | - } | ||
| 121 | - return false; | ||
| 122 | -} | ||
| 123 | - | ||
| 124 | -static StoreInst *findSingleStore(AllocaInst *AI) { | ||
| 125 | - StoreInst *Store = nullptr; | ||
| 126 | - for (auto *U : AI->users()) { | ||
| 127 | - if (!isa<StoreInst>(U)) | ||
| 128 | - continue; // not a store | ||
| 129 | - if (Store) | ||
| 130 | - return nullptr; // there are more than one stores | ||
| 131 | - Store = dyn_cast<StoreInst>(U); | ||
| 132 | - } | ||
| 133 | - return Store; | ||
| 134 | -} | ||
| 135 | - | ||
| 136 | -static void fixFunctionPtrAllocaUsers(AllocaInst *AI) { | ||
| 137 | - // Find and remove a single store to alloca | ||
| 138 | - auto *SingleStore = findSingleStore(AI); | ||
| 139 | - assert(SingleStore && "More than one store to the function pointer alloca"); | ||
| 140 | - auto *StoredVal = SingleStore->getValueOperand(); | ||
| 141 | - SingleStore->eraseFromParent(); | ||
| 142 | - | ||
| 143 | - // Find loads from the alloca and replace thier users | ||
| 144 | - for (auto *U : AI->users()) { | ||
| 145 | - auto *LI = dyn_cast<LoadInst>(U); | ||
| 146 | - if (!LI) | ||
| 147 | - continue; | ||
| 148 | - | ||
| 149 | - for (auto *U : LI->users()) { | ||
| 150 | - auto *UInst = cast<Instruction>(U); | ||
| 151 | - auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast( | ||
| 152 | - StoredVal, UInst->getType(), "", UInst); | ||
| 153 | - UInst->replaceAllUsesWith(Cast); | ||
| 154 | - } | ||
| 155 | - } | ||
| 156 | -} | ||
| 157 | - | ||
| 158 | -static int getBlockLiteralIdx(const Function &F) { | ||
| 159 | - StringRef FName = F.getName(); | ||
| 160 | - if (isEnqueueKernelBI(FName)) | ||
| 161 | - return FName.contains("events") ? 7 : 4; | ||
| 162 | - if (isKernelQueryBI(FName)) | ||
| 163 | - return FName.contains("for_ndrange") ? 2 : 1; | ||
| 164 | - if (FName.startswith("__") && FName.contains("_block_invoke")) | ||
| 165 | - return F.hasStructRetAttr() ? 1 : 0; | ||
| 166 | - | ||
| 167 | - return -1; // No block literal argument | ||
| 168 | -} | ||
| 169 | - | ||
| 170 | -static bool hasBlockLiteralArg(const Function &F) { | ||
| 171 | - return getBlockLiteralIdx(F) != -1; | ||
| 172 | -} | ||
| 173 | - | ||
| 174 | -static bool simplifyFunctionPtrCasts(Function &F) { | ||
| 175 | - bool Changed = false; | ||
| 176 | - int BlockLiteralIdx = getBlockLiteralIdx(F); | ||
| 177 | - for (auto *U : F.users()) { | ||
| 178 | - auto *Call = dyn_cast<CallInst>(U); | ||
| 179 | - if (!Call) | ||
| 180 | - continue; | ||
| 181 | - if (Call->getFunction()->getName() == F.getName().str() + "_kernel") | ||
| 182 | - continue; // Skip block invoke function calls inside block invoke kernels | ||
| 183 | - | ||
| 184 | - const DataLayout &DL = F.getParent()->getDataLayout(); | ||
| 185 | - auto *BlockLiteral = Call->getOperand(BlockLiteralIdx); | ||
| 186 | - auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL); | ||
| 187 | - if (isa<GlobalVariable>(BlockLiteralVal)) | ||
| 188 | - continue; // nothing to do with globals | ||
| 189 | - | ||
| 190 | - auto *BlockLiteralAlloca = cast<AllocaInst>(BlockLiteralVal); | ||
| 191 | - assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() && | ||
| 192 | - "Function type shouldn't be there"); | ||
| 193 | - | ||
| 194 | - auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast( | ||
| 195 | - BlockLiteralAlloca, BlockLiteral->getType(), "", Call); | ||
| 196 | - BlockLiteral->replaceAllUsesWith(NewBlockLiteral); | ||
| 197 | - Changed |= true; | ||
| 198 | - } | ||
| 199 | - return Changed; | ||
| 200 | -} | ||
| 201 | - | ||
| 202 | -static void | ||
| 203 | -findFunctionPtrAllocas(Module &M, | ||
| 204 | - SmallVectorImpl<AllocaInst *> &FuncPtrAllocas) { | ||
| 205 | - for (auto &F : M) { | ||
| 206 | - if (F.isDeclaration()) | ||
| 207 | - continue; | ||
| 208 | - for (auto &I : instructions(F)) { | ||
| 209 | - auto *AI = dyn_cast<AllocaInst>(&I); | ||
| 210 | - if (!AI || !isFuncPtrAlloca(AI)) | ||
| 211 | - continue; | ||
| 212 | - FuncPtrAllocas.push_back(AI); | ||
| 213 | - } | ||
| 214 | - } | ||
| 215 | -} | ||
| 216 | - | ||
| 217 | -static void | ||
| 218 | -findUnusedFunctionPtrInsts(Module &M, | ||
| 219 | - SmallSetVector<Instruction *, 16> &FuncPtrInsts) { | ||
| 220 | - for (auto &F : M) { | ||
| 221 | - if (F.isDeclaration()) | ||
| 222 | - continue; | ||
| 223 | - for (auto &I : instructions(F)) | ||
| 224 | - if (I.user_empty() && isFuncPtrInst(&I)) | ||
| 225 | - FuncPtrInsts.insert(&I); | ||
| 226 | - } | ||
| 227 | -} | ||
| 228 | - | ||
| 229 | -static void | ||
| 230 | -findUnusedFunctionPtrGlbs(Module &M, | ||
| 231 | - SmallVectorImpl<GlobalVariable *> &FuncPtrGlbs) { | ||
| 232 | - for (auto &GV : M.globals()) { | ||
| 233 | - if (!GV.user_empty()) | ||
| 234 | - continue; | ||
| 235 | - auto *GVType = dyn_cast<PointerType>(GV.getType()->getElementType()); | ||
| 236 | - if (GVType && GVType->getElementType()->isFunctionTy()) | ||
| 237 | - FuncPtrGlbs.push_back(&GV); | ||
| 238 | - } | ||
| 239 | +static bool isBlockInvoke(Function &F) { | ||
| 240 | + static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$"); | ||
| 241 | + return BlockInvokeRegex.match(F.getName()); | ||
| 242 | } | ||
| 243 | |||
| 244 | class SPIRVLowerOCLBlocks : public ModulePass { | ||
| 245 | @@ -250,44 +77,24 @@ public: | ||
| 246 | |||
| 247 | bool runOnModule(Module &M) { | ||
| 248 | bool Changed = false; | ||
| 249 | - | ||
| 250 | - // 1. Find function pointer allocas and fix their users | ||
| 251 | - SmallVector<AllocaInst *, 16> FuncPtrAllocas; | ||
| 252 | - findFunctionPtrAllocas(M, FuncPtrAllocas); | ||
| 253 | - | ||
| 254 | - Changed |= !FuncPtrAllocas.empty(); | ||
| 255 | - for (auto *AI : FuncPtrAllocas) | ||
| 256 | - fixFunctionPtrAllocaUsers(AI); | ||
| 257 | - | ||
| 258 | - // 2. Simplify consecutive casts which use function pointer types | ||
| 259 | - for (auto &F : M) | ||
| 260 | - if (hasBlockLiteralArg(F)) | ||
| 261 | - Changed |= simplifyFunctionPtrCasts(F); | ||
| 262 | - | ||
| 263 | - // 3. Cleanup unused instructions with function pointer type | ||
| 264 | - // which are occured after pp. 1-2 | ||
| 265 | - SmallSetVector<Instruction *, 16> FuncPtrInsts; | ||
| 266 | - findUnusedFunctionPtrInsts(M, FuncPtrInsts); | ||
| 267 | - | ||
| 268 | - Changed |= !FuncPtrInsts.empty(); | ||
| 269 | - while (!FuncPtrInsts.empty()) { | ||
| 270 | - Instruction *I = FuncPtrInsts.pop_back_val(); | ||
| 271 | - removeUnusedFunctionPtrInst(I, FuncPtrInsts); | ||
| 272 | + for (Function &F : M) { | ||
| 273 | + if (!isBlockInvoke(F)) | ||
| 274 | + continue; | ||
| 275 | + for (User *U : F.users()) { | ||
| 276 | + if (!isa<Constant>(U)) | ||
| 277 | + continue; | ||
| 278 | + Constant *Null = Constant::getNullValue(U->getType()); | ||
| 279 | + if (U != Null) { | ||
| 280 | + U->replaceAllUsesWith(Null); | ||
| 281 | + Changed = true; | ||
| 282 | + } | ||
| 283 | + } | ||
| 284 | } | ||
| 285 | - | ||
| 286 | - // 4. Find and remove unused global variables with function pointer type | ||
| 287 | - SmallVector<GlobalVariable *, 16> FuncPtrGlbs; | ||
| 288 | - findUnusedFunctionPtrGlbs(M, FuncPtrGlbs); | ||
| 289 | - | ||
| 290 | - Changed |= !FuncPtrGlbs.empty(); | ||
| 291 | - for (auto *GV : FuncPtrGlbs) | ||
| 292 | - GV->eraseFromParent(); | ||
| 293 | - | ||
| 294 | return Changed; | ||
| 295 | } | ||
| 296 | |||
| 297 | static char ID; | ||
| 298 | -}; // class SPIRVLowerOCLBlocks | ||
| 299 | +}; | ||
| 300 | |||
| 301 | char SPIRVLowerOCLBlocks::ID = 0; | ||
| 302 | |||
| 303 | diff --git a/test/global_block.ll b/test/global_block.ll | ||
| 304 | index 4fc453b..b558213 100644 | ||
| 305 | --- a/test/global_block.ll | ||
| 306 | +++ b/test/global_block.ll | ||
| 307 | @@ -17,7 +17,7 @@ | ||
| 308 | ; RUN: spirv-val %t.spv | ||
| 309 | ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM | ||
| 310 | |||
| 311 | -target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" | ||
| 312 | +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
| 313 | target triple = "spir-unknown-unknown" | ||
| 314 | |||
| 315 | ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke" | ||
| 316 | @@ -27,71 +27,56 @@ target triple = "spir-unknown-unknown" | ||
| 317 | ; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]] | ||
| 318 | ; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]] | ||
| 319 | |||
| 320 | -;; This variable is not needed in SPIRV | ||
| 321 | -; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1 | ||
| 322 | -; CHECK-LLVM-NOT: @block_kernel.b1 | ||
| 323 | -@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8 | ||
| 324 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
| 325 | |||
| 326 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
| 327 | +@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4 | ||
| 328 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 | ||
| 329 | |||
| 330 | -; Function Attrs: convergent nounwind | ||
| 331 | -define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { | ||
| 332 | +; Function Attrs: convergent noinline nounwind optnone | ||
| 333 | +define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { | ||
| 334 | entry: | ||
| 335 | - %res.addr = alloca i32 addrspace(1)*, align 8 | ||
| 336 | - store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 | ||
| 337 | - | ||
| 338 | + %res.addr = alloca i32 addrspace(1)*, align 4 | ||
| 339 | + store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4 | ||
| 340 | ; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]] | ||
| 341 | ; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5) | ||
| 342 | - %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 | ||
| 343 | - | ||
| 344 | - %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 | ||
| 345 | - store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14 | ||
| 346 | + %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 | ||
| 347 | + %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4 | ||
| 348 | + store i32 %call, i32 addrspace(1)* %0, align 4 | ||
| 349 | ret void | ||
| 350 | } | ||
| 351 | |||
| 352 | -; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]] | ||
| 353 | +; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]] | ||
| 354 | ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}} | ||
| 355 | ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}} | ||
| 356 | ; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}}) | ||
| 357 | -; Function Attrs: convergent nounwind | ||
| 358 | +; Function Attrs: convergent noinline nounwind optnone | ||
| 359 | define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 { | ||
| 360 | entry: | ||
| 361 | - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 | ||
| 362 | + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
| 363 | %i.addr = alloca i32, align 4 | ||
| 364 | - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 | ||
| 365 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
| 366 | - store i32 %i, i32* %i.addr, align 4, !tbaa !14 | ||
| 367 | - %0 = load i32, i32* %i.addr, align 4, !tbaa !14 | ||
| 368 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
| 369 | + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 370 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
| 371 | + store i32 %i, i32* %i.addr, align 4 | ||
| 372 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
| 373 | + %0 = load i32, i32* %i.addr, align 4 | ||
| 374 | %add = add nsw i32 %0, 1 | ||
| 375 | ret i32 %add | ||
| 376 | } | ||
| 377 | |||
| 378 | -attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 379 | -attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 380 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 381 | +attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 382 | attributes #2 = { convergent } | ||
| 383 | |||
| 384 | !llvm.module.flags = !{!0} | ||
| 385 | -!opencl.enable.FP_CONTRACT = !{} | ||
| 386 | !opencl.ocl.version = !{!1} | ||
| 387 | !opencl.spir.version = !{!1} | ||
| 388 | -!opencl.used.extensions = !{!2} | ||
| 389 | -!opencl.used.optional.core.features = !{!2} | ||
| 390 | -!opencl.compiler.options = !{!2} | ||
| 391 | -!llvm.ident = !{!3} | ||
| 392 | +!llvm.ident = !{!2} | ||
| 393 | |||
| 394 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
| 395 | !1 = !{i32 2, i32 0} | ||
| 396 | -!2 = !{} | ||
| 397 | -!3 = !{!"clang version 7.0.0"} | ||
| 398 | -!4 = !{i32 1} | ||
| 399 | -!5 = !{!"none"} | ||
| 400 | -!6 = !{!"int*"} | ||
| 401 | -!7 = !{!""} | ||
| 402 | -!8 = !{i1 false} | ||
| 403 | -!9 = !{i32 0} | ||
| 404 | -!10 = !{!11, !11, i64 0} | ||
| 405 | -!11 = !{!"any pointer", !12, i64 0} | ||
| 406 | -!12 = !{!"omnipotent char", !13, i64 0} | ||
| 407 | -!13 = !{!"Simple C/C++ TBAA"} | ||
| 408 | -!14 = !{!15, !15, i64 0} | ||
| 409 | -!15 = !{!"int", !12, i64 0} | ||
| 410 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
| 411 | +!3 = !{i32 1} | ||
| 412 | +!4 = !{!"none"} | ||
| 413 | +!5 = !{!"int*"} | ||
| 414 | +!6 = !{!""} | ||
| 415 | diff --git a/test/literal-struct.ll b/test/literal-struct.ll | ||
| 416 | index b88187f..dec957a 100644 | ||
| 417 | --- a/test/literal-struct.ll | ||
| 418 | +++ b/test/literal-struct.ll | ||
| 419 | @@ -2,7 +2,7 @@ | ||
| 420 | ; structs, i.e. structs whose type has no name. Typicaly clang generate such | ||
| 421 | ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with | ||
| 422 | ; the following command: | ||
| 423 | -; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll | ||
| 424 | +; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll | ||
| 425 | |||
| 426 | ; literal-struct.cl: | ||
| 427 | ; void foo() | ||
| 428 | @@ -17,25 +17,28 @@ | ||
| 429 | ; RUN: llvm-spirv %t.bc -o %t.spv | ||
| 430 | ; RUN: spirv-val %t.spv | ||
| 431 | |||
| 432 | -; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0 | ||
| 433 | -; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}} | ||
| 434 | +; CHECK: TypeInt [[Int:[0-9]+]] 32 0 | ||
| 435 | +; CHECK: TypeInt [[Int8:[0-9]+]] 8 0 | ||
| 436 | +; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]] | ||
| 437 | +; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]] | ||
| 438 | |||
| 439 | target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
| 440 | target triple = "spir" | ||
| 441 | |||
| 442 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
| 443 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
| 444 | + | ||
| 445 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 | ||
| 446 | ; CHECK: ConstantComposite [[StructType]] | ||
| 447 | |||
| 448 | -; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct. | ||
| 449 | -@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4 | ||
| 450 | +@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4 | ||
| 451 | ; CHECK: ConstantNull [[StructType]] | ||
| 452 | |||
| 453 | ; Function Attrs: convergent noinline nounwind optnone | ||
| 454 | define spir_func void @foo() #0 { | ||
| 455 | entry: | ||
| 456 | - %myBlock = alloca void () addrspace(4)*, align 4 | ||
| 457 | - store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4 | ||
| 458 | - call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 | ||
| 459 | + %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4 | ||
| 460 | + store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4 | ||
| 461 | + call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 | ||
| 462 | ret void | ||
| 463 | } | ||
| 464 | |||
| 465 | @@ -43,14 +46,14 @@ entry: | ||
| 466 | define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 { | ||
| 467 | entry: | ||
| 468 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
| 469 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 | ||
| 470 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
| 471 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 472 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
| 473 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 | ||
| 474 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
| 475 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
| 476 | ret void | ||
| 477 | } | ||
| 478 | |||
| 479 | -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 480 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 481 | attributes #1 = { convergent } | ||
| 482 | |||
| 483 | !llvm.module.flags = !{!0} | ||
| 484 | @@ -60,4 +63,4 @@ attributes #1 = { convergent } | ||
| 485 | |||
| 486 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
| 487 | !1 = !{i32 2, i32 0} | ||
| 488 | -!2 = !{!"clang version 8.0.0 "} | ||
| 489 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
| 490 | diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll | ||
| 491 | index a68820f..ebd2c5f 100644 | ||
| 492 | --- a/test/transcoding/block_w_struct_return.ll | ||
| 493 | +++ b/test/transcoding/block_w_struct_return.ll | ||
| 494 | @@ -16,6 +16,8 @@ | ||
| 495 | ; res[tid] = kernelBlock(aa).a - 6; | ||
| 496 | ; } | ||
| 497 | |||
| 498 | +; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll | ||
| 499 | + | ||
| 500 | ; RUN: llvm-as %s -o %t.bc | ||
| 501 | ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt | ||
| 502 | ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV | ||
| 503 | @@ -28,12 +30,14 @@ | ||
| 504 | ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke" | ||
| 505 | |||
| 506 | ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32 | ||
| 507 | +; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8 | ||
| 508 | +; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]] | ||
| 509 | ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]] | ||
| 510 | ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]] | ||
| 511 | |||
| 512 | ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7 | ||
| 513 | ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7 | ||
| 514 | -; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}} | ||
| 515 | +; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}} | ||
| 516 | ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]] | ||
| 517 | |||
| 518 | ; CHECK-LLVM: %[[StructA:.*]] = type { i32 } | ||
| 519 | @@ -42,20 +46,21 @@ | ||
| 520 | target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
| 521 | target triple = "spir64-unknown-unknown" | ||
| 522 | |||
| 523 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
| 524 | %struct.A = type { i32 } | ||
| 525 | |||
| 526 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
| 527 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8 | ||
| 528 | |||
| 529 | ; Function Attrs: convergent noinline nounwind optnone | ||
| 530 | -define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 { | ||
| 531 | +define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { | ||
| 532 | entry: | ||
| 533 | %res.addr = alloca i32 addrspace(1)*, align 8 | ||
| 534 | - %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8 | ||
| 535 | + %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8 | ||
| 536 | %tid = alloca i64, align 8 | ||
| 537 | %aa = alloca %struct.A, align 4 | ||
| 538 | %tmp = alloca %struct.A, align 4 | ||
| 539 | store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8 | ||
| 540 | - store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8 | ||
| 541 | + store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8 | ||
| 542 | %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4 | ||
| 543 | store i64 %call, i64* %tid, align 8 | ||
| 544 | %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8 | ||
| 545 | @@ -64,7 +69,7 @@ entry: | ||
| 546 | store i32 -1, i32 addrspace(1)* %arrayidx, align 4 | ||
| 547 | %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0 | ||
| 548 | store i32 5, i32* %a, align 4 | ||
| 549 | - call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 | ||
| 550 | + call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 | ||
| 551 | %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0 | ||
| 552 | %2 = load i32, i32* %a1, align 4 | ||
| 553 | %sub = sub nsw i32 %2, 6 | ||
| 554 | @@ -79,10 +84,10 @@ entry: | ||
| 555 | define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 { | ||
| 556 | entry: | ||
| 557 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 | ||
| 558 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8 | ||
| 559 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8 | ||
| 560 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 | ||
| 561 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
| 562 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8 | ||
| 563 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
| 564 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8 | ||
| 565 | %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0 | ||
| 566 | store i32 6, i32* %a1, align 4 | ||
| 567 | %0 = bitcast %struct.A* %agg.result to i8* | ||
| 568 | @@ -97,30 +102,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r | ||
| 569 | ; Function Attrs: convergent nounwind readnone | ||
| 570 | declare spir_func i64 @_Z13get_global_idj(i32) #3 | ||
| 571 | |||
| 572 | -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 573 | -attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 574 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 575 | +attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 576 | attributes #2 = { argmemonly nounwind } | ||
| 577 | attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 578 | attributes #4 = { convergent nounwind readnone } | ||
| 579 | attributes #5 = { convergent } | ||
| 580 | |||
| 581 | !llvm.module.flags = !{!0} | ||
| 582 | -!opencl.enable.FP_CONTRACT = !{} | ||
| 583 | !opencl.ocl.version = !{!1} | ||
| 584 | !opencl.spir.version = !{!1} | ||
| 585 | -!opencl.used.extensions = !{!2} | ||
| 586 | -!opencl.used.optional.core.features = !{!2} | ||
| 587 | -!opencl.compiler.options = !{!2} | ||
| 588 | -!llvm.ident = !{!3} | ||
| 589 | +!llvm.ident = !{!2} | ||
| 590 | |||
| 591 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
| 592 | !1 = !{i32 2, i32 0} | ||
| 593 | -!2 = !{} | ||
| 594 | -!3 = !{!"clang version 7.0.0"} | ||
| 595 | -!4 = !{i32 1} | ||
| 596 | -!5 = !{!"none"} | ||
| 597 | -!6 = !{!"int*"} | ||
| 598 | -!7 = !{!""} | ||
| 599 | -!8 = !{i1 false} | ||
| 600 | -!9 = !{i32 0} | ||
| 601 | - | ||
| 602 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
| 603 | +!3 = !{i32 1} | ||
| 604 | +!4 = !{!"none"} | ||
| 605 | +!5 = !{!"int*"} | ||
| 606 | +!6 = !{!""} | ||
| 607 | diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll | ||
| 608 | index 1f0b360..761043e 100644 | ||
| 609 | --- a/test/transcoding/enqueue_kernel.ll | ||
| 610 | +++ b/test/transcoding/enqueue_kernel.ll | ||
| 611 | @@ -51,11 +51,12 @@ | ||
| 612 | ; ModuleID = 'enqueue_kernel.cl' | ||
| 613 | source_filename = "enqueue_kernel.cl" | ||
| 614 | target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
| 615 | -target triple = "spir-unknown-unknown" | ||
| 616 | +target triple = "spir" | ||
| 617 | |||
| 618 | %opencl.queue_t = type opaque | ||
| 619 | %struct.ndrange_t = type { i32 } | ||
| 620 | %opencl.clk_event_t = type opaque | ||
| 621 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
| 622 | |||
| 623 | ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel" | ||
| 624 | ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel" | ||
| 625 | @@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown" | ||
| 626 | |||
| 627 | ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32 | ||
| 628 | ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8 | ||
| 629 | -; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 | ||
| 630 | ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0 | ||
| 631 | -; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17 | ||
| 632 | +; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21 | ||
| 633 | ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2 | ||
| 634 | -; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20 | ||
| 635 | -; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] | ||
| 636 | +; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 | ||
| 637 | +; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24 | ||
| 638 | |||
| 639 | ; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}} | ||
| 640 | +; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] | ||
| 641 | +; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] | ||
| 642 | ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]] | ||
| 643 | ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]] | ||
| 644 | -; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] | ||
| 645 | ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]] | ||
| 646 | ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] | ||
| 647 | ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] | ||
| 648 | ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] | ||
| 649 | ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]] | ||
| 650 | |||
| 651 | -; CHECK-LLVM: [[BlockTy1:%[0-9a-z\.]+]] = type { i32, i32 } | ||
| 652 | -; CHECK-LLVM: [[BlockTy2:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }> | ||
| 653 | -; CHECK-LLVM: [[BlockTy3:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> | ||
| 654 | -; CHECK-LLVM: [[BlockTy4:%[0-9a-z\.]+]] = type <{ i32, i32 }> | ||
| 655 | +; CHECK-LLVM: [[BlockTy1:%[0-9a-z\.]+]] = type { i32, i32, i8 addrspace(4)* } | ||
| 656 | +; CHECK-LLVM: [[BlockTy2:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> | ||
| 657 | +; CHECK-LLVM: [[BlockTy3:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> | ||
| 658 | +; CHECK-LLVM: [[BlockTy4:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)* }> | ||
| 659 | |||
| 660 | -; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 | ||
| 661 | -; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 | ||
| 662 | +; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 | ||
| 663 | +; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 | ||
| 664 | |||
| 665 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
| 666 | -@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
| 667 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4 | ||
| 668 | +@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4 | ||
| 669 | |||
| 670 | ; Function Attrs: convergent noinline nounwind optnone | ||
| 671 | -define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { | ||
| 672 | +define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { | ||
| 673 | entry: | ||
| 674 | + %a.addr = alloca i32 addrspace(1)*, align 4 | ||
| 675 | + %b.addr = alloca i32 addrspace(1)*, align 4 | ||
| 676 | + %i.addr = alloca i32, align 4 | ||
| 677 | + %c0.addr = alloca i8, align 1 | ||
| 678 | %default_queue = alloca %opencl.queue_t*, align 4 | ||
| 679 | %flags = alloca i32, align 4 | ||
| 680 | %ndrange = alloca %struct.ndrange_t, align 4 | ||
| 681 | %clk_event = alloca %opencl.clk_event_t*, align 4 | ||
| 682 | %event_wait_list = alloca %opencl.clk_event_t*, align 4 | ||
| 683 | %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4 | ||
| 684 | - %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4 | ||
| 685 | - %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 | ||
| 686 | + %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4 | ||
| 687 | + %tmp = alloca %struct.ndrange_t, align 4 | ||
| 688 | + %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 | ||
| 689 | + %tmp4 = alloca %struct.ndrange_t, align 4 | ||
| 690 | %c = alloca i8, align 1 | ||
| 691 | + %tmp11 = alloca %struct.ndrange_t, align 4 | ||
| 692 | + %block_sizes = alloca [1 x i32], align 4 | ||
| 693 | + %tmp12 = alloca %struct.ndrange_t, align 4 | ||
| 694 | + %block_sizes13 = alloca [3 x i32], align 4 | ||
| 695 | + store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4 | ||
| 696 | + store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4 | ||
| 697 | + store i32 %i, i32* %i.addr, align 4 | ||
| 698 | + store i8 %c0, i8* %c0.addr, align 1 | ||
| 699 | store i32 0, i32* %flags, align 4 | ||
| 700 | %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 | ||
| 701 | %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4 | ||
| 702 | store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4 | ||
| 703 | %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
| 704 | %2 = load i32, i32* %flags, align 4 | ||
| 705 | - %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 | ||
| 706 | - store i32 17, i32* %block.size, align 4 | ||
| 707 | - %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 | ||
| 708 | + %3 = bitcast %struct.ndrange_t* %tmp to i8* | ||
| 709 | + %4 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
| 710 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false) | ||
| 711 | + %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 | ||
| 712 | + store i32 21, i32* %block.size, align 4 | ||
| 713 | + %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 | ||
| 714 | store i32 4, i32* %block.align, align 4 | ||
| 715 | - %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 | ||
| 716 | - store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4 | ||
| 717 | - %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 | ||
| 718 | - store i32 %i, i32* %block.captured1, align 4 | ||
| 719 | - %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 | ||
| 720 | - store i8 %c0, i8* %block.captured2, align 4 | ||
| 721 | - %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()* | ||
| 722 | - %4 = addrspacecast void ()* %3 to i8 addrspace(4)* | ||
| 723 | + %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 | ||
| 724 | + store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4 | ||
| 725 | + %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 | ||
| 726 | + %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 | ||
| 727 | + store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4 | ||
| 728 | + %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 | ||
| 729 | + %6 = load i32, i32* %i.addr, align 4 | ||
| 730 | + store i32 %6, i32* %block.captured1, align 4 | ||
| 731 | + %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5 | ||
| 732 | + %7 = load i8, i8* %c0.addr, align 1 | ||
| 733 | + store i8 %7, i8* %block.captured2, align 4 | ||
| 734 | + %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic* | ||
| 735 | + %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)* | ||
| 736 | |||
| 737 | ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]] | ||
| 738 | ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} | ||
| 739 | ; [[ConstInt0]] [[EventNull]] [[EventNull]] | ||
| 740 | ; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]] | ||
| 741 | |||
| 742 | -; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)* | ||
| 743 | +; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic* | ||
| 744 | +; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)* | ||
| 745 | ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)* | ||
| 746 | -; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]]) | ||
| 747 | - | ||
| 748 | - %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4) | ||
| 749 | - %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* | ||
| 750 | - %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* | ||
| 751 | - %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 | ||
| 752 | - store i32 20, i32* %block.size5, align 4 | ||
| 753 | - %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 | ||
| 754 | +; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]]) | ||
| 755 | + | ||
| 756 | + %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) | ||
| 757 | + %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
| 758 | + %12 = load i32, i32* %flags, align 4 | ||
| 759 | + %13 = bitcast %struct.ndrange_t* %tmp4 to i8* | ||
| 760 | + %14 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
| 761 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false) | ||
| 762 | + %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* | ||
| 763 | + %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* | ||
| 764 | + %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 | ||
| 765 | + store i32 24, i32* %block.size5, align 4 | ||
| 766 | + %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 | ||
| 767 | store i32 4, i32* %block.align6, align 4 | ||
| 768 | - %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 | ||
| 769 | - store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4 | ||
| 770 | - %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 | ||
| 771 | - store i32 %i, i32* %block.captured8, align 4 | ||
| 772 | - %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 | ||
| 773 | - store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4 | ||
| 774 | - %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()* | ||
| 775 | - %9 = addrspacecast void ()* %8 to i8 addrspace(4)* | ||
| 776 | + %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 | ||
| 777 | + store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4 | ||
| 778 | + %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 | ||
| 779 | + %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 | ||
| 780 | + store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4 | ||
| 781 | + %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 | ||
| 782 | + %18 = load i32, i32* %i.addr, align 4 | ||
| 783 | + store i32 %18, i32* %block.captured9, align 4 | ||
| 784 | + %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5 | ||
| 785 | + %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4 | ||
| 786 | + store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4 | ||
| 787 | + %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic* | ||
| 788 | + %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)* | ||
| 789 | + | ||
| 790 | |||
| 791 | ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]] | ||
| 792 | ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]] | ||
| 793 | @@ -158,16 +193,24 @@ entry: | ||
| 794 | ; [[ConstInt2]] [[Event1]] [[Event2]] | ||
| 795 | ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]] | ||
| 796 | |||
| 797 | -; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)* | ||
| 798 | +; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic* | ||
| 799 | +; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4) | ||
| 800 | ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)* | ||
| 801 | -; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]]) | ||
| 802 | - | ||
| 803 | - %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) | ||
| 804 | - %11 = alloca [1 x i32] | ||
| 805 | - %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0 | ||
| 806 | - %13 = load i8, i8* %c, align 1 | ||
| 807 | - %14 = zext i8 %13 to i32 | ||
| 808 | - store i32 %14, i32* %12, align 4 | ||
| 809 | +; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]]) | ||
| 810 | + | ||
| 811 | + %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21) | ||
| 812 | + %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
| 813 | + %24 = load i32, i32* %flags, align 4 | ||
| 814 | + %25 = bitcast %struct.ndrange_t* %tmp11 to i8* | ||
| 815 | + %26 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
| 816 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false) | ||
| 817 | + %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 | ||
| 818 | + %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)* | ||
| 819 | + %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* | ||
| 820 | + %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0 | ||
| 821 | + %30 = load i8, i8* %c, align 1 | ||
| 822 | + %31 = zext i8 %30 to i32 | ||
| 823 | + store i32 %31, i32* %29, align 4 | ||
| 824 | |||
| 825 | ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]] | ||
| 826 | ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]] | ||
| 827 | @@ -182,14 +225,18 @@ entry: | ||
| 828 | ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)* | ||
| 829 | ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}}) | ||
| 830 | |||
| 831 | - %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12) | ||
| 832 | - %16 = alloca [3 x i32] | ||
| 833 | - %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0 | ||
| 834 | - store i32 1, i32* %17, align 4 | ||
| 835 | - %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1 | ||
| 836 | - store i32 2, i32* %18, align 4 | ||
| 837 | - %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2 | ||
| 838 | - store i32 4, i32* %19, align 4 | ||
| 839 | + %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29) | ||
| 840 | + %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
| 841 | + %34 = load i32, i32* %flags, align 4 | ||
| 842 | + %35 = bitcast %struct.ndrange_t* %tmp12 to i8* | ||
| 843 | + %36 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
| 844 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false) | ||
| 845 | + %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0 | ||
| 846 | + store i32 1, i32* %37, align 4 | ||
| 847 | + %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1 | ||
| 848 | + store i32 2, i32* %38, align 4 | ||
| 849 | + %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2 | ||
| 850 | + store i32 4, i32* %39, align 4 | ||
| 851 | |||
| 852 | ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]] | ||
| 853 | ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]] | ||
| 854 | @@ -206,24 +253,27 @@ entry: | ||
| 855 | ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)* | ||
| 856 | ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}}) | ||
| 857 | |||
| 858 | - %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17) | ||
| 859 | + %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37) | ||
| 860 | ret void | ||
| 861 | } | ||
| 862 | |||
| 863 | +; Function Attrs: argmemonly nounwind | ||
| 864 | +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1 | ||
| 865 | + | ||
| 866 | ; Function Attrs: convergent noinline nounwind optnone | ||
| 867 | define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 { | ||
| 868 | entry: | ||
| 869 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
| 870 | - %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 | ||
| 871 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 | ||
| 872 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 873 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* | ||
| 874 | - store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 | ||
| 875 | - %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 | ||
| 876 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* | ||
| 877 | + store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 | ||
| 878 | + %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5 | ||
| 879 | %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4 | ||
| 880 | %conv = sext i8 %0 to i32 | ||
| 881 | - %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2 | ||
| 882 | + %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 | ||
| 883 | %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4 | ||
| 884 | - %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 | ||
| 885 | + %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 | ||
| 886 | %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4 | ||
| 887 | %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2 | ||
| 888 | store i32 %conv, i32 addrspace(1)* %arrayidx, align 4 | ||
| 889 | @@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i | ||
| 890 | define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 { | ||
| 891 | entry: | ||
| 892 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
| 893 | - %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 | ||
| 894 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 | ||
| 895 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 896 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* | ||
| 897 | - store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 | ||
| 898 | - %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 | ||
| 899 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* | ||
| 900 | + store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 | ||
| 901 | + %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5 | ||
| 902 | %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4 | ||
| 903 | - %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 | ||
| 904 | + %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 | ||
| 905 | %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4 | ||
| 906 | %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1 | ||
| 907 | %2 = load i32, i32 addrspace(1)* %arrayidx, align 4 | ||
| 908 | - %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2 | ||
| 909 | + %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 | ||
| 910 | %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4 | ||
| 911 | - %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 | ||
| 912 | + %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 | ||
| 913 | %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4 | ||
| 914 | %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4 | ||
| 915 | store i32 %2, i32 addrspace(1)* %arrayidx4, align 4 | ||
| 916 | @@ -276,11 +326,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac | ||
| 917 | entry: | ||
| 918 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
| 919 | %p.addr = alloca i8 addrspace(3)*, align 4 | ||
| 920 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 | ||
| 921 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
| 922 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 923 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
| 924 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
| 925 | store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4 | ||
| 926 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 | ||
| 927 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
| 928 | ret void | ||
| 929 | } | ||
| 930 | |||
| 931 | @@ -300,13 +350,13 @@ entry: | ||
| 932 | %p1.addr = alloca i8 addrspace(3)*, align 4 | ||
| 933 | %p2.addr = alloca i8 addrspace(3)*, align 4 | ||
| 934 | %p3.addr = alloca i8 addrspace(3)*, align 4 | ||
| 935 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 | ||
| 936 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
| 937 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
| 938 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
| 939 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
| 940 | store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4 | ||
| 941 | store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4 | ||
| 942 | store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4 | ||
| 943 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 | ||
| 944 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
| 945 | ret void | ||
| 946 | } | ||
| 947 | |||
| 948 | @@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*, | ||
| 949 | ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*) | ||
| 950 | ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*) | ||
| 951 | |||
| 952 | -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 953 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 954 | attributes #1 = { argmemonly nounwind } | ||
| 955 | -attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 956 | +attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
| 957 | attributes #3 = { nounwind } | ||
| 958 | |||
| 959 | !llvm.module.flags = !{!0} | ||
| 960 | -!opencl.enable.FP_CONTRACT = !{} | ||
| 961 | !opencl.ocl.version = !{!1} | ||
| 962 | !opencl.spir.version = !{!1} | ||
| 963 | -!opencl.used.extensions = !{!2} | ||
| 964 | -!opencl.used.optional.core.features = !{!2} | ||
| 965 | -!opencl.compiler.options = !{!2} | ||
| 966 | -!llvm.ident = !{!3} | ||
| 967 | +!llvm.ident = !{!2} | ||
| 968 | |||
| 969 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
| 970 | !1 = !{i32 2, i32 0} | ||
| 971 | -!2 = !{} | ||
| 972 | -!3 = !{!"clang version 7.0.0"} | ||
| 973 | -!4 = !{i32 1, i32 1, i32 0, i32 0} | ||
| 974 | -!5 = !{!"none", !"none", !"none", !"none"} | ||
| 975 | -!6 = !{!"int*", !"int*", !"int", !"char"} | ||
| 976 | -!7 = !{!"", !"", !"", !""} | ||
| 977 | -!8 = !{i1 false, i1 false, i1 false, i1 false} | ||
| 978 | -!9 = !{i32 0, i32 0, i32 0, i32 0} | ||
| 979 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
| 980 | +!3 = !{i32 1, i32 1, i32 0, i32 0} | ||
| 981 | +!4 = !{!"none", !"none", !"none", !"none"} | ||
| 982 | +!5 = !{!"int*", !"int*", !"int", !"char"} | ||
| 983 | +!6 = !{!"", !"", !"", !""} | ||
| 984 | -- | ||
| 985 | 2.7.4 | ||
| 986 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch index 9d25bbad..da2475f9 100644 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch | |||
| @@ -1,38 +1,39 @@ | |||
| 1 | From 7bbd0058362ac3bb5edd7a82d43e1785810776b3 Mon Sep 17 00:00:00 2001 | 1 | From 559fb8f82295ec4dc64a132b6566939b85c1b6fe Mon Sep 17 00:00:00 2001 |
| 2 | From: Anuj Mittal <anuj.mittal@intel.com> | 2 | From: Anuj Mittal <anuj.mittal@intel.com> |
| 3 | Date: Fri, 29 Mar 2019 08:56:53 +0800 | 3 | Date: Thu, 15 Aug 2019 22:34:31 +0800 |
| 4 | Subject: [PATCH] dont export targets for binaries | 4 | Subject: [PATCH] dont export targets for binaries |
| 5 | 5 | ||
| 6 | The projects using LLVM cmake modules look for target binaries in | 6 | The projects using LLVM cmake modules look for target binaries in |
| 7 | sysroot as a result which isn't desirable in this case and isn't needed | 7 | sysroot as a result which isn't desirable in this case and isn't needed |
| 8 | either. | 8 | either. |
| 9 | 9 | ||
| 10 | Upstream-Status: Inappropriate [cross-compile specific] | 10 | Upstream-Status: Inappropriate [cross-compile specific] |
| 11 | 11 | ||
| 12 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | 12 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> |
| 13 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
| 13 | --- | 14 | --- |
| 14 | llvm/cmake/modules/AddLLVM.cmake | 9 --------- | 15 | llvm/cmake/modules/AddLLVM.cmake | 9 --------- |
| 15 | llvm/cmake/modules/TableGen.cmake | 6 ------ | 16 | llvm/cmake/modules/TableGen.cmake | 6 ------ |
| 16 | 2 files changed, 15 deletions(-) | 17 | 2 files changed, 15 deletions(-) |
| 17 | 18 | ||
| 18 | diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake | 19 | diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake |
| 19 | index 0df6845..b79f4fa 100644 | 20 | index 619e986b8aa..d2bc1a25dd9 100644 |
| 20 | --- a/llvm/cmake/modules/AddLLVM.cmake | 21 | --- a/llvm/cmake/modules/AddLLVM.cmake |
| 21 | +++ b/llvm/cmake/modules/AddLLVM.cmake | 22 | +++ b/llvm/cmake/modules/AddLLVM.cmake |
| 22 | @@ -866,12 +866,6 @@ macro(add_llvm_tool name) | 23 | @@ -898,12 +898,6 @@ macro(add_llvm_tool name) |
| 23 | 24 | ||
| 24 | if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY) | 25 | if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY) |
| 25 | if( LLVM_BUILD_TOOLS ) | 26 | if( LLVM_BUILD_TOOLS ) |
| 27 | - set(export_to_llvmexports) | ||
| 26 | - if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR | 28 | - if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR |
| 27 | - NOT LLVM_DISTRIBUTION_COMPONENTS) | 29 | - NOT LLVM_DISTRIBUTION_COMPONENTS) |
| 28 | - set(export_to_llvmexports EXPORT LLVMExports) | 30 | - set(export_to_llvmexports EXPORT LLVMExports) |
| 29 | - set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True) | 31 | - set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True) |
| 30 | - endif() | 32 | - endif() |
| 31 | - | 33 | |
| 32 | install(TARGETS ${name} | 34 | install(TARGETS ${name} |
| 33 | ${export_to_llvmexports} | 35 | ${export_to_llvmexports} |
| 34 | RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR} | 36 | @@ -917,9 +911,6 @@ macro(add_llvm_tool name) |
| 35 | @@ -884,9 +878,6 @@ macro(add_llvm_tool name) | ||
| 36 | endif() | 37 | endif() |
| 37 | endif() | 38 | endif() |
| 38 | endif() | 39 | endif() |
| @@ -43,18 +44,19 @@ index 0df6845..b79f4fa 100644 | |||
| 43 | endmacro(add_llvm_tool name) | 44 | endmacro(add_llvm_tool name) |
| 44 | 45 | ||
| 45 | diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake | 46 | diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake |
| 46 | index 3c84ae7..141219f 100644 | 47 | index 36c026b5c0f..537acd696d8 100644 |
| 47 | --- a/llvm/cmake/modules/TableGen.cmake | 48 | --- a/llvm/cmake/modules/TableGen.cmake |
| 48 | +++ b/llvm/cmake/modules/TableGen.cmake | 49 | +++ b/llvm/cmake/modules/TableGen.cmake |
| 49 | @@ -164,14 +164,8 @@ macro(add_tablegen target project) | 50 | @@ -148,15 +148,9 @@ macro(add_tablegen target project) |
| 50 | endif() | 51 | endif() |
| 51 | 52 | ||
| 52 | if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY) | 53 | if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY AND LLVM_BUILD_UTILS) |
| 54 | - set(export_to_llvmexports) | ||
| 53 | - if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR | 55 | - if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR |
| 54 | - NOT LLVM_DISTRIBUTION_COMPONENTS) | 56 | - NOT LLVM_DISTRIBUTION_COMPONENTS) |
| 55 | - set(export_to_llvmexports EXPORT LLVMExports) | 57 | - set(export_to_llvmexports EXPORT LLVMExports) |
| 56 | - endif() | 58 | - endif() |
| 57 | - | 59 | |
| 58 | install(TARGETS ${target} | 60 | install(TARGETS ${target} |
| 59 | ${export_to_llvmexports} | 61 | ${export_to_llvmexports} |
| 60 | RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR}) | 62 | RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR}) |
| @@ -62,5 +64,5 @@ index 3c84ae7..141219f 100644 | |||
| 62 | - set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target}) | 64 | - set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target}) |
| 63 | endmacro() | 65 | endmacro() |
| 64 | -- | 66 | -- |
| 65 | 2.7.4 | 67 | 2.17.1 |
| 66 | 68 | ||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-skip-building-tests.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-skip-building-tests.patch index c58e7d98..9a7b8445 100644 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-skip-building-tests.patch +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-skip-building-tests.patch | |||
| @@ -1,9 +1,9 @@ | |||
| 1 | From 91db4c3cf7f290a3cab5caa316fc25a60dd409f1 Mon Sep 17 00:00:00 2001 | 1 | From 48e50f06b1bbed94cdf5207587161d4bfce7366e Mon Sep 17 00:00:00 2001 |
| 2 | From: Anuj Mittal <anuj.mittal@intel.com> | 2 | From: Naveen Saini <naveen.kumar.saini@intel.com> |
| 3 | Date: Fri, 16 Aug 2019 20:25:16 +0800 | 3 | Date: Wed, 21 Aug 2019 14:35:31 +0800 |
| 4 | Subject: [PATCH] llvm-spirv: skip including tests | 4 | Subject: [PATCH] llvm-spirv: skip building tests |
| 5 | 5 | ||
| 6 | Some of these need clang to be built and since we're building this in-tree, | 6 | Some of these need clang to be built and since we're building this in-tree, |
| 7 | that leads to problems when compiling libcxx, compiler-rt which aren't built | 7 | that leads to problems when compiling libcxx, compiler-rt which aren't built |
| 8 | in-tree. | 8 | in-tree. |
| 9 | 9 | ||
| @@ -13,12 +13,13 @@ all components, disable the building of tests altogether. | |||
| 13 | Upstream-Status: Inappropriate | 13 | Upstream-Status: Inappropriate |
| 14 | 14 | ||
| 15 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | 15 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> |
| 16 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
| 16 | --- | 17 | --- |
| 17 | CMakeLists.txt | 10 ---------- | 18 | CMakeLists.txt | 10 ---------- |
| 18 | 1 file changed, 10 deletions(-) | 19 | 1 file changed, 10 deletions(-) |
| 19 | 20 | ||
| 20 | diff --git a/CMakeLists.txt b/CMakeLists.txt | 21 | diff --git a/CMakeLists.txt b/CMakeLists.txt |
| 21 | index d632a50..81ddf62 100644 | 22 | index 1208741..20ca3e6 100644 |
| 22 | --- a/CMakeLists.txt | 23 | --- a/CMakeLists.txt |
| 23 | +++ b/CMakeLists.txt | 24 | +++ b/CMakeLists.txt |
| 24 | @@ -15,13 +15,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) | 25 | @@ -15,13 +15,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) |
| @@ -32,7 +33,7 @@ index d632a50..81ddf62 100644 | |||
| 32 | - ) | 33 | - ) |
| 33 | - endif(LLVM_INCLUDE_TESTS) | 34 | - endif(LLVM_INCLUDE_TESTS) |
| 34 | - | 35 | - |
| 35 | find_package(LLVM 8.0.0 REQUIRED | 36 | find_package(LLVM 9.0.0 REQUIRED |
| 36 | COMPONENTS | 37 | COMPONENTS |
| 37 | Analysis | 38 | Analysis |
| 38 | @@ -56,9 +49,6 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include) | 39 | @@ -56,9 +49,6 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include) |
| @@ -46,5 +47,5 @@ index d632a50..81ddf62 100644 | |||
| 46 | install( | 47 | install( |
| 47 | FILES | 48 | FILES |
| 48 | -- | 49 | -- |
| 49 | 2.7.4 | 50 | 2.17.1 |
| 50 | 51 | ||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch deleted file mode 100644 index 2e935a13..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch +++ /dev/null | |||
| @@ -1,294 +0,0 @@ | |||
| 1 | From c94ec28600255098ffb9d73d1b386a7c8a535590 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Andrew Savonichev <andrew.savonichev@intel.com> | ||
| 3 | Date: Thu, 21 Feb 2019 11:02:10 +0000 | ||
| 4 | Subject: [PATCH 2/2] [OpenCL] Simplify LLVM IR generated for OpenCL blocks | ||
| 5 | |||
| 6 | Summary: | ||
| 7 | Emit direct call of block invoke functions when possible, i.e. in case the | ||
| 8 | block is not passed as a function argument. | ||
| 9 | Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()` | ||
| 10 | |||
| 11 | Reviewers: Anastasia, yaxunl, svenvh | ||
| 12 | |||
| 13 | Reviewed By: Anastasia | ||
| 14 | |||
| 15 | Subscribers: cfe-commits | ||
| 16 | |||
| 17 | Tags: #clang | ||
| 18 | |||
| 19 | Differential Revision: https://reviews.llvm.org/D58388 | ||
| 20 | |||
| 21 | git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354568 91177308-0d34-0410-b5e6-96231b3b80d8 | ||
| 22 | |||
| 23 | Upstream-Status: Backport | ||
| 24 | [https://github.com/llvm-mirror/clang/commit/eae71f8d05ce550c4e2595c9b7082cc2c7882c58] | ||
| 25 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
| 26 | --- | ||
| 27 | lib/CodeGen/CGBlocks.cpp | 77 +++++++++++++------------- | ||
| 28 | lib/CodeGen/CGOpenCLRuntime.cpp | 30 +++++++--- | ||
| 29 | lib/CodeGen/CGOpenCLRuntime.h | 4 ++ | ||
| 30 | test/CodeGenOpenCL/blocks.cl | 10 +--- | ||
| 31 | test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 34 +++++++++--- | ||
| 32 | 5 files changed, 91 insertions(+), 64 deletions(-) | ||
| 33 | |||
| 34 | diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp | ||
| 35 | index fa3c3ee..10a0238 100644 | ||
| 36 | --- a/lib/CodeGen/CGBlocks.cpp | ||
| 37 | +++ b/lib/CodeGen/CGBlocks.cpp | ||
| 38 | @@ -1261,52 +1261,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, | ||
| 39 | ReturnValueSlot ReturnValue) { | ||
| 40 | const BlockPointerType *BPT = | ||
| 41 | E->getCallee()->getType()->getAs<BlockPointerType>(); | ||
| 42 | - | ||
| 43 | llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); | ||
| 44 | - | ||
| 45 | - // Get a pointer to the generic block literal. | ||
| 46 | - // For OpenCL we generate generic AS void ptr to be able to reuse the same | ||
| 47 | - // block definition for blocks with captures generated as private AS local | ||
| 48 | - // variables and without captures generated as global AS program scope | ||
| 49 | - // variables. | ||
| 50 | - unsigned AddrSpace = 0; | ||
| 51 | - if (getLangOpts().OpenCL) | ||
| 52 | - AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); | ||
| 53 | - | ||
| 54 | - llvm::Type *BlockLiteralTy = | ||
| 55 | - llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); | ||
| 56 | - | ||
| 57 | - // Bitcast the callee to a block literal. | ||
| 58 | - BlockPtr = | ||
| 59 | - Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); | ||
| 60 | - | ||
| 61 | - // Get the function pointer from the literal. | ||
| 62 | - llvm::Value *FuncPtr = | ||
| 63 | - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, | ||
| 64 | - CGM.getLangOpts().OpenCL ? 2 : 3); | ||
| 65 | - | ||
| 66 | - // Add the block literal. | ||
| 67 | + llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType(); | ||
| 68 | + llvm::Value *Func = nullptr; | ||
| 69 | + QualType FnType = BPT->getPointeeType(); | ||
| 70 | + ASTContext &Ctx = getContext(); | ||
| 71 | CallArgList Args; | ||
| 72 | |||
| 73 | - QualType VoidPtrQualTy = getContext().VoidPtrTy; | ||
| 74 | - llvm::Type *GenericVoidPtrTy = VoidPtrTy; | ||
| 75 | if (getLangOpts().OpenCL) { | ||
| 76 | - GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType(); | ||
| 77 | - VoidPtrQualTy = | ||
| 78 | - getContext().getPointerType(getContext().getAddrSpaceQualType( | ||
| 79 | - getContext().VoidTy, LangAS::opencl_generic)); | ||
| 80 | - } | ||
| 81 | - | ||
| 82 | - BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); | ||
| 83 | - Args.add(RValue::get(BlockPtr), VoidPtrQualTy); | ||
| 84 | - | ||
| 85 | - QualType FnType = BPT->getPointeeType(); | ||
| 86 | + // For OpenCL, BlockPtr is already casted to generic block literal. | ||
| 87 | + | ||
| 88 | + // First argument of a block call is a generic block literal casted to | ||
| 89 | + // generic void pointer, i.e. i8 addrspace(4)* | ||
| 90 | + llvm::Value *BlockDescriptor = Builder.CreatePointerCast( | ||
| 91 | + BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType()); | ||
| 92 | + QualType VoidPtrQualTy = Ctx.getPointerType( | ||
| 93 | + Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic)); | ||
| 94 | + Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy); | ||
| 95 | + // And the rest of the arguments. | ||
| 96 | + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); | ||
| 97 | + | ||
| 98 | + // We *can* call the block directly unless it is a function argument. | ||
| 99 | + if (!isa<ParmVarDecl>(E->getCalleeDecl())) | ||
| 100 | + Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); | ||
| 101 | + else { | ||
| 102 | + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2); | ||
| 103 | + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); | ||
| 104 | + } | ||
| 105 | + } else { | ||
| 106 | + // Bitcast the block literal to a generic block literal. | ||
| 107 | + BlockPtr = Builder.CreatePointerCast( | ||
| 108 | + BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal"); | ||
| 109 | + // Get pointer to the block invoke function | ||
| 110 | + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3); | ||
| 111 | |||
| 112 | - // And the rest of the arguments. | ||
| 113 | - EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); | ||
| 114 | + // First argument is a block literal casted to a void pointer | ||
| 115 | + BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy); | ||
| 116 | + Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy); | ||
| 117 | + // And the rest of the arguments. | ||
| 118 | + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); | ||
| 119 | |||
| 120 | - // Load the function. | ||
| 121 | - llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); | ||
| 122 | + // Load the function. | ||
| 123 | + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); | ||
| 124 | + } | ||
| 125 | |||
| 126 | const FunctionType *FuncTy = FnType->castAs<FunctionType>(); | ||
| 127 | const CGFunctionInfo &FnInfo = | ||
| 128 | diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp | ||
| 129 | index 7f6f595..75003e5 100644 | ||
| 130 | --- a/lib/CodeGen/CGOpenCLRuntime.cpp | ||
| 131 | +++ b/lib/CodeGen/CGOpenCLRuntime.cpp | ||
| 132 | @@ -123,6 +123,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { | ||
| 133 | CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); | ||
| 134 | } | ||
| 135 | |||
| 136 | +// Get the block literal from an expression derived from the block expression. | ||
| 137 | +// OpenCL v2.0 s6.12.5: | ||
| 138 | +// Block variable declarations are implicitly qualified with const. Therefore | ||
| 139 | +// all block variables must be initialized at declaration time and may not be | ||
| 140 | +// reassigned. | ||
| 141 | +static const BlockExpr *getBlockExpr(const Expr *E) { | ||
| 142 | + const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop. | ||
| 143 | + while(!isa<BlockExpr>(E) && E != Prev) { | ||
| 144 | + Prev = E; | ||
| 145 | + E = E->IgnoreCasts(); | ||
| 146 | + if (auto DR = dyn_cast<DeclRefExpr>(E)) { | ||
| 147 | + E = cast<VarDecl>(DR->getDecl())->getInit(); | ||
| 148 | + } | ||
| 149 | + } | ||
| 150 | + return cast<BlockExpr>(E); | ||
| 151 | +} | ||
| 152 | + | ||
| 153 | /// Record emitted llvm invoke function and llvm block literal for the | ||
| 154 | /// corresponding block expression. | ||
| 155 | void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, | ||
| 156 | @@ -137,20 +154,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, | ||
| 157 | EnqueuedBlockMap[E].Kernel = nullptr; | ||
| 158 | } | ||
| 159 | |||
| 160 | +llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { | ||
| 161 | + return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc; | ||
| 162 | +} | ||
| 163 | + | ||
| 164 | CGOpenCLRuntime::EnqueuedBlockInfo | ||
| 165 | CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { | ||
| 166 | CGF.EmitScalarExpr(E); | ||
| 167 | |||
| 168 | // The block literal may be assigned to a const variable. Chasing down | ||
| 169 | // to get the block literal. | ||
| 170 | - if (auto DR = dyn_cast<DeclRefExpr>(E)) { | ||
| 171 | - E = cast<VarDecl>(DR->getDecl())->getInit(); | ||
| 172 | - } | ||
| 173 | - E = E->IgnoreImplicit(); | ||
| 174 | - if (auto Cast = dyn_cast<CastExpr>(E)) { | ||
| 175 | - E = Cast->getSubExpr(); | ||
| 176 | - } | ||
| 177 | - auto *Block = cast<BlockExpr>(E); | ||
| 178 | + const BlockExpr *Block = getBlockExpr(E); | ||
| 179 | |||
| 180 | assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && | ||
| 181 | "Block expression not emitted"); | ||
| 182 | diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h | ||
| 183 | index 750721f..4effc7e 100644 | ||
| 184 | --- a/lib/CodeGen/CGOpenCLRuntime.h | ||
| 185 | +++ b/lib/CodeGen/CGOpenCLRuntime.h | ||
| 186 | @@ -92,6 +92,10 @@ public: | ||
| 187 | /// \param Block block literal emitted for the block expression. | ||
| 188 | void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, | ||
| 189 | llvm::Value *Block); | ||
| 190 | + | ||
| 191 | + /// \return LLVM block invoke function emitted for an expression derived from | ||
| 192 | + /// the block expression. | ||
| 193 | + llvm::Function *getInvokeFunction(const Expr *E); | ||
| 194 | }; | ||
| 195 | |||
| 196 | } | ||
| 197 | diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl | ||
| 198 | index 19aacc3..ab5a2c6 100644 | ||
| 199 | --- a/test/CodeGenOpenCL/blocks.cl | ||
| 200 | +++ b/test/CodeGenOpenCL/blocks.cl | ||
| 201 | @@ -39,11 +39,8 @@ void foo(){ | ||
| 202 | // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* | ||
| 203 | // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]], | ||
| 204 | // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]] | ||
| 205 | - // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 | ||
| 206 | // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* | ||
| 207 | - // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] | ||
| 208 | - // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)* | ||
| 209 | - // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]]) | ||
| 210 | + // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]]) | ||
| 211 | // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2 | ||
| 212 | // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]] | ||
| 213 | // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 | ||
| 214 | @@ -53,11 +50,8 @@ void foo(){ | ||
| 215 | // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic* | ||
| 216 | // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]], | ||
| 217 | // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]] | ||
| 218 | - // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2 | ||
| 219 | // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8* | ||
| 220 | - // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]] | ||
| 221 | - // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)* | ||
| 222 | - // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]]) | ||
| 223 | + // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]]) | ||
| 224 | |||
| 225 | int (^ block_B)(void) = ^{ | ||
| 226 | return i; | ||
| 227 | diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 228 | index 8445016..1566912 100644 | ||
| 229 | --- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 230 | +++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
| 231 | @@ -312,9 +312,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 232 | }; | ||
| 233 | |||
| 234 | // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. | ||
| 235 | - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) | ||
| 236 | - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* | ||
| 237 | - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
| 238 | + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
| 239 | block_A(); | ||
| 240 | |||
| 241 | // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. | ||
| 242 | @@ -333,15 +331,35 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 243 | unsigned size = get_kernel_work_group_size(block_A); | ||
| 244 | |||
| 245 | // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted. | ||
| 246 | - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) | ||
| 247 | - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* | ||
| 248 | - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
| 249 | + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
| 250 | block_A(); | ||
| 251 | |||
| 252 | + // Make sure that block invoke function is resolved correctly after sequence of assignements. | ||
| 253 | + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* | ||
| 254 | + // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* | ||
| 255 | + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) | ||
| 256 | + // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), | ||
| 257 | + // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1, | ||
| 258 | + bl_t b1 = block_G; | ||
| 259 | + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* | ||
| 260 | + // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* | ||
| 261 | + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) | ||
| 262 | + // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), | ||
| 263 | + // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2, | ||
| 264 | + bl_t b2 = b1; | ||
| 265 | + // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* | ||
| 266 | + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) | ||
| 267 | + // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null) | ||
| 268 | + b2(0); | ||
| 269 | + // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]]. | ||
| 270 | + // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl( | ||
| 271 | + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), | ||
| 272 | + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
| 273 | + size = get_kernel_preferred_work_group_size_multiple(b2); | ||
| 274 | + | ||
| 275 | void (^block_C)(void) = ^{ | ||
| 276 | callee(i, a); | ||
| 277 | }; | ||
| 278 | - | ||
| 279 | // Emits block literal on stack and block kernel [[INVLK3]]. | ||
| 280 | // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke | ||
| 281 | // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue | ||
| 282 | @@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
| 283 | // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}}) | ||
| 284 | // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}}) | ||
| 285 | // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}}) | ||
| 286 | +// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) | ||
| 287 | // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}}) | ||
| 288 | // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) | ||
| 289 | -// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) | ||
| 290 | // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}}) | ||
| 291 | // COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}}) | ||
| 292 | -- | ||
| 293 | 1.8.3.1 | ||
| 294 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-Remove-extra-semicolon.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-Remove-extra-semicolon.patch deleted file mode 100644 index 5f5a957e..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-Remove-extra-semicolon.patch +++ /dev/null | |||
| @@ -1,27 +0,0 @@ | |||
| 1 | From a2c093c8db7b4e3a5612d0fcce9e4fd1756d6e4b Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Alexey Sotkin <alexey.sotkin@intel.com> | ||
| 3 | Date: Mon, 5 Aug 2019 18:18:01 +0300 | ||
| 4 | Subject: [PATCH] Remove extra semicolon | ||
| 5 | |||
| 6 | Upstream-Status: Backport | ||
| 7 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
| 8 | --- | ||
| 9 | lib/SPIRV/libSPIRV/SPIRVEnum.h | 2 +- | ||
| 10 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
| 11 | |||
| 12 | diff --git a/lib/SPIRV/libSPIRV/SPIRVEnum.h b/lib/SPIRV/libSPIRV/SPIRVEnum.h | ||
| 13 | index c93a484..3a071e7 100644 | ||
| 14 | --- a/lib/SPIRV/libSPIRV/SPIRVEnum.h | ||
| 15 | +++ b/lib/SPIRV/libSPIRV/SPIRVEnum.h | ||
| 16 | @@ -124,7 +124,7 @@ template <> inline void SPIRVMap<SPIRVExtensionKind, std::string>::init() { | ||
| 17 | add(SPV_INTEL_device_side_avc_motion_estimation, | ||
| 18 | "SPV_INTEL_device_side_avc_motion_estimation"); | ||
| 19 | add(SPV_KHR_no_integer_wrap_decoration, "SPV_KHR_no_integer_wrap_decoration"); | ||
| 20 | -}; | ||
| 21 | +} | ||
| 22 | |||
| 23 | template <> inline void SPIRVMap<SPIRVExtInstSetKind, std::string>::init() { | ||
| 24 | add(SPIRVEIS_OpenCL, "OpenCL.std"); | ||
| 25 | -- | ||
| 26 | 2.7.4 | ||
| 27 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch deleted file mode 100644 index 510c7c6e..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch +++ /dev/null | |||
| @@ -1,61 +0,0 @@ | |||
| 1 | From 29e2813a2ab7d5569860bb07892dfef7b5374d96 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Yaxun Liu <Yaxun.Liu@amd.com> | ||
| 3 | Date: Tue, 26 Feb 2019 16:20:41 +0000 | ||
| 4 | Subject: [PATCH] [OpenCL] Fix assertion due to blocks | ||
| 5 | |||
| 6 | A recent change caused assertion in CodeGenFunction::EmitBlockCallExpr when a block is called. | ||
| 7 | |||
| 8 | There is code | ||
| 9 | |||
| 10 | Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); | ||
| 11 | getCalleeDecl calls Expr::getReferencedDeclOfCallee, which does not handle | ||
| 12 | BlockExpr and returns nullptr, which causes isa to assert. | ||
| 13 | |||
| 14 | This patch fixes that. | ||
| 15 | |||
| 16 | Differential Revision: https://reviews.llvm.org/D58658 | ||
| 17 | |||
| 18 | |||
| 19 | git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354893 91177308-0d34-0410-b5e6-96231b3b80d8 | ||
| 20 | |||
| 21 | Upstream-Status: Backport | ||
| 22 | [https://github.com/llvm-mirror/clang/commit/29e2813a2ab7d5569860bb07892dfef7b5374d96] | ||
| 23 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
| 24 | --- | ||
| 25 | lib/AST/Expr.cpp | 2 ++ | ||
| 26 | test/CodeGenOpenCL/blocks.cl | 6 ++++++ | ||
| 27 | 2 files changed, 8 insertions(+) | ||
| 28 | |||
| 29 | diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp | ||
| 30 | index aef1eab..85690c7 100644 | ||
| 31 | --- a/lib/AST/Expr.cpp | ||
| 32 | +++ b/lib/AST/Expr.cpp | ||
| 33 | @@ -1358,6 +1358,8 @@ Decl *Expr::getReferencedDeclOfCallee() { | ||
| 34 | return DRE->getDecl(); | ||
| 35 | if (MemberExpr *ME = dyn_cast<MemberExpr>(CEE)) | ||
| 36 | return ME->getMemberDecl(); | ||
| 37 | + if (auto *BE = dyn_cast<BlockExpr>(CEE)) | ||
| 38 | + return BE->getBlockDecl(); | ||
| 39 | |||
| 40 | return nullptr; | ||
| 41 | } | ||
| 42 | diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl | ||
| 43 | index ab5a2c6..c3e2685 100644 | ||
| 44 | --- a/test/CodeGenOpenCL/blocks.cl | ||
| 45 | +++ b/test/CodeGenOpenCL/blocks.cl | ||
| 46 | @@ -90,6 +90,12 @@ int get42() { | ||
| 47 | return blockArgFunc(^{return 42;}); | ||
| 48 | } | ||
| 49 | |||
| 50 | +// COMMON-LABEL: define {{.*}}@call_block | ||
| 51 | +// call {{.*}}@__call_block_block_invoke | ||
| 52 | +int call_block() { | ||
| 53 | + return ^int(int num) { return num; } (11); | ||
| 54 | +} | ||
| 55 | + | ||
| 56 | // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size" | ||
| 57 | // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align" | ||
| 58 | |||
| 59 | -- | ||
| 60 | 1.8.3.1 | ||
| 61 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch new file mode 100644 index 00000000..cd519971 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch | |||
| @@ -0,0 +1,111 @@ | |||
| 1 | From eeb816d95f0910bd246e37bb2bb3923acf0edf6b Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Aleksander Us <aleksander.us@intel.com> | ||
| 3 | Date: Mon, 26 Aug 2019 15:47:41 +0300 | ||
| 4 | Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in | ||
| 5 | SplitBlockPredecessors. | ||
| 6 | |||
| 7 | In case when BB is header of some loop and predecessor is latch of | ||
| 8 | this loop, metadata was not attached to newly created basic block. | ||
| 9 | This led to loss of loop metadata for other passes. | ||
| 10 | |||
| 11 | Upstream-Status: Submitted [https://reviews.llvm.org/D66892] | ||
| 12 | |||
| 13 | https://github.com/intel/llvm-patches/commit/8af4449e2d201707f7f2f832b473a0439e255f32 | ||
| 14 | |||
| 15 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
| 16 | --- | ||
| 17 | lib/Transforms/Utils/BasicBlockUtils.cpp | 23 ++++++++---- | ||
| 18 | test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++ | ||
| 19 | 2 files changed, 52 insertions(+), 7 deletions(-) | ||
| 20 | create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll | ||
| 21 | |||
| 22 | diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp | ||
| 23 | index 5fa371377c8..3a90ae061fb 100644 | ||
| 24 | --- a/lib/Transforms/Utils/BasicBlockUtils.cpp | ||
| 25 | +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp | ||
| 26 | @@ -579,24 +579,33 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, | ||
| 27 | |||
| 28 | // The new block unconditionally branches to the old block. | ||
| 29 | BranchInst *BI = BranchInst::Create(BB, NewBB); | ||
| 30 | + bool IsBBHeader = LI && LI->isLoopHeader(BB); | ||
| 31 | + Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr; | ||
| 32 | // Splitting the predecessors of a loop header creates a preheader block. | ||
| 33 | - if (LI && LI->isLoopHeader(BB)) | ||
| 34 | + if (IsBBHeader) | ||
| 35 | // Using the loop start line number prevents debuggers stepping into the | ||
| 36 | // loop body for this instruction. | ||
| 37 | - BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc()); | ||
| 38 | + BI->setDebugLoc(BBLoop->getStartLoc()); | ||
| 39 | else | ||
| 40 | BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); | ||
| 41 | |||
| 42 | // Move the edges from Preds to point to NewBB instead of BB. | ||
| 43 | - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { | ||
| 44 | + for (BasicBlock *Pred : Preds) { | ||
| 45 | + Instruction *PI = Pred->getTerminator(); | ||
| 46 | // This is slightly more strict than necessary; the minimum requirement | ||
| 47 | // is that there be no more than one indirectbr branching to BB. And | ||
| 48 | // all BlockAddress uses would need to be updated. | ||
| 49 | - assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && | ||
| 50 | + assert(!isa<IndirectBrInst>(PI) && | ||
| 51 | "Cannot split an edge from an IndirectBrInst"); | ||
| 52 | - assert(!isa<CallBrInst>(Preds[i]->getTerminator()) && | ||
| 53 | - "Cannot split an edge from a CallBrInst"); | ||
| 54 | - Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); | ||
| 55 | + assert(!isa<CallBrInst>(PI) && "Cannot split an edge from a CallBrInst"); | ||
| 56 | + if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) { | ||
| 57 | + // Update loop metadata if it exists. | ||
| 58 | + if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) { | ||
| 59 | + BI->setMetadata(LLVMContext::MD_loop, LoopMD); | ||
| 60 | + PI->setMetadata(LLVMContext::MD_loop, nullptr); | ||
| 61 | + } | ||
| 62 | + } | ||
| 63 | + PI->replaceUsesOfWith(BB, NewBB); | ||
| 64 | } | ||
| 65 | |||
| 66 | // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI | ||
| 67 | diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll | ||
| 68 | new file mode 100644 | ||
| 69 | index 00000000000..c15c92fe3ae | ||
| 70 | --- /dev/null | ||
| 71 | +++ b/test/Transforms/LoopSimplify/loop_metadata.ll | ||
| 72 | @@ -0,0 +1,36 @@ | ||
| 73 | +; RUN: opt -S -loop-simplify < %s | FileCheck %s | ||
| 74 | + | ||
| 75 | +; CHECK: for.cond.loopexit: | ||
| 76 | +; CHECK: br label %for.cond, !llvm.loop !0 | ||
| 77 | +; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit | ||
| 78 | + | ||
| 79 | +define void @foo() { | ||
| 80 | +entry: | ||
| 81 | + br label %for.cond | ||
| 82 | + | ||
| 83 | +for.cond: ; preds = %for.cond1, %entry | ||
| 84 | + %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ] | ||
| 85 | + %cmp = icmp ult i32 %j, 8 | ||
| 86 | + br i1 %cmp, label %for.body, label %for.end | ||
| 87 | + | ||
| 88 | +for.body: ; preds = %for.cond | ||
| 89 | + %dummy1 = add i32 1, 1 | ||
| 90 | + %add = add nuw nsw i32 %j, 1 | ||
| 91 | + br label %for.cond1 | ||
| 92 | + | ||
| 93 | +for.cond1: ; preds = %for.body1, %for.body | ||
| 94 | + %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ] | ||
| 95 | + %cmp1 = icmp ult i32 %i.0, 8 | ||
| 96 | + br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0 | ||
| 97 | + | ||
| 98 | +for.body1: ; preds = %for.cond1 | ||
| 99 | + %dummy2 = add i32 1, 1 | ||
| 100 | + %inc = add nuw nsw i32 %i.0, 1 | ||
| 101 | + br label %for.cond1 | ||
| 102 | + | ||
| 103 | +for.end: ; preds = %for.cond | ||
| 104 | + ret void | ||
| 105 | +} | ||
| 106 | + | ||
| 107 | +!0 = distinct !{!0, !1} | ||
| 108 | +!1 = !{!"llvm.loop.unroll.full"} | ||
| 109 | -- | ||
| 110 | 2.18.0 | ||
| 111 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch new file mode 100644 index 00000000..48307deb --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch | |||
| @@ -0,0 +1,146 @@ | |||
| 1 | From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Aleksander Us <aleksander.us@intel.com> | ||
| 3 | Date: Mon, 26 Aug 2019 15:45:47 +0300 | ||
| 4 | Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in | ||
| 5 | LFTR when possible. | ||
| 6 | |||
| 7 | SCEV analysis cannot properly cache instruction with poison flags | ||
| 8 | (for example, add nsw outside of loop will not be reused by expander). | ||
| 9 | This can lead to generating of additional instructions by SCEV expander. | ||
| 10 | |||
| 11 | Example IR: | ||
| 12 | |||
| 13 | ... | ||
| 14 | %maxval = add nuw nsw i32 %a1, %a2 | ||
| 15 | ... | ||
| 16 | for.body: | ||
| 17 | ... | ||
| 18 | %cmp22 = icmp ult i32 %ivadd, %maxval | ||
| 19 | br i1 %cmp22, label %for.body, label %for.end | ||
| 20 | ... | ||
| 21 | |||
| 22 | SCEV expander will generate copy of %maxval in preheader but without | ||
| 23 | nuw/nsw flags. This can be avoided by explicit check that iv count | ||
| 24 | value gives the same SCEV expressions as calculated by LFTR. | ||
| 25 | |||
| 26 | Upstream-Status: Submitted [https://reviews.llvm.org/D66890] | ||
| 27 | |||
| 28 | https://github.com/intel/llvm-patches/commit/fd6a6c97341a56fd21bc32bc940afea751312e8f | ||
| 29 | |||
| 30 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
| 31 | --- | ||
| 32 | lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++- | ||
| 33 | test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++ | ||
| 34 | test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++----- | ||
| 35 | test/Transforms/IndVarSimplify/udiv.ll | 1 + | ||
| 36 | 4 files changed, 38 insertions(+), 7 deletions(-) | ||
| 37 | create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll | ||
| 38 | |||
| 39 | diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp | ||
| 40 | index f9fc698a4a9..5e04dac8aa6 100644 | ||
| 41 | --- a/lib/Transforms/Scalar/IndVarSimplify.cpp | ||
| 42 | +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp | ||
| 43 | @@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, | ||
| 44 | if (UsePostInc) | ||
| 45 | IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); | ||
| 46 | |||
| 47 | + // If computed limit is equal to old limit then do not use SCEV expander | ||
| 48 | + // because it can lost NUW/NSW flags and create extra instructions. | ||
| 49 | + BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); | ||
| 50 | + if (ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getOperand(0))) { | ||
| 51 | + Value *Limit = Cmp->getOperand(0); | ||
| 52 | + if (!L->isLoopInvariant(Limit)) | ||
| 53 | + Limit = Cmp->getOperand(1); | ||
| 54 | + if (SE->getSCEV(Limit) == IVLimit) | ||
| 55 | + return Limit; | ||
| 56 | + } | ||
| 57 | + | ||
| 58 | // Expand the code for the iteration count. | ||
| 59 | assert(SE->isLoopInvariant(IVLimit, L) && | ||
| 60 | "Computed iteration count is not loop invariant!"); | ||
| 61 | @@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, | ||
| 62 | // SCEV expression (IVInit) for a pointer type IV value (IndVar). | ||
| 63 | Type *LimitTy = ExitCount->getType()->isPointerTy() ? | ||
| 64 | IndVar->getType() : ExitCount->getType(); | ||
| 65 | - BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); | ||
| 66 | return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll | ||
| 70 | new file mode 100644 | ||
| 71 | index 00000000000..abd1cbb6c51 | ||
| 72 | --- /dev/null | ||
| 73 | +++ b/test/Transforms/IndVarSimplify/add_nsw.ll | ||
| 74 | @@ -0,0 +1,23 @@ | ||
| 75 | +; RUN: opt -indvars -S %s | FileCheck %s | ||
| 76 | + | ||
| 77 | +target datalayout = "e-p:32:32-i64:64-n8:16:32" | ||
| 78 | + | ||
| 79 | +; CHECK: for.body.preheader: | ||
| 80 | +; CHECK-NOT: add | ||
| 81 | +; CHECK: for.body: | ||
| 82 | + | ||
| 83 | +define void @foo(i32 %a1, i32 %a2) { | ||
| 84 | +entry: | ||
| 85 | + %maxval = add nuw nsw i32 %a1, %a2 | ||
| 86 | + %cmp = icmp slt i32 %maxval, 1 | ||
| 87 | + br i1 %cmp, label %for.end, label %for.body | ||
| 88 | + | ||
| 89 | +for.body: ; preds = %entry, %for.body | ||
| 90 | + %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ] | ||
| 91 | + %add31 = add nuw nsw i32 %j.02, 1 | ||
| 92 | + %cmp22 = icmp slt i32 %add31, %maxval | ||
| 93 | + br i1 %cmp22, label %for.body, label %for.end | ||
| 94 | + | ||
| 95 | +for.end: ; preds = %for.body | ||
| 96 | + ret void | ||
| 97 | +} | ||
| 98 | diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll | ||
| 99 | index 14ae9738696..509d662b767 100644 | ||
| 100 | --- a/test/Transforms/IndVarSimplify/lftr-reuse.ll | ||
| 101 | +++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll | ||
| 102 | @@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { | ||
| 103 | ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] | ||
| 104 | ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] | ||
| 105 | ; CHECK: outer.preheader: | ||
| 106 | -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 | ||
| 107 | ; CHECK-NEXT: br label [[OUTER:%.*]] | ||
| 108 | ; CHECK: outer: | ||
| 109 | -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] | ||
| 110 | -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] | ||
| 111 | +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] | ||
| 112 | ; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] | ||
| 113 | ; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 | ||
| 114 | ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] | ||
| 115 | @@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { | ||
| 116 | ; CHECK: inner: | ||
| 117 | ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ] | ||
| 118 | ; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 | ||
| 119 | -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]] | ||
| 120 | +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]] | ||
| 121 | ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] | ||
| 122 | ; CHECK: outer.inc.loopexit: | ||
| 123 | ; CHECK-NEXT: br label [[OUTER_INC]] | ||
| 124 | ; CHECK: outer.inc: | ||
| 125 | ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 | ||
| 126 | -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 | ||
| 127 | -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] | ||
| 128 | +; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]] | ||
| 129 | ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] | ||
| 130 | ; CHECK: exit.loopexit: | ||
| 131 | ; CHECK-NEXT: br label [[EXIT]] | ||
| 132 | diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll | ||
| 133 | index b3f2c2a6a66..3530343ef4a 100644 | ||
| 134 | --- a/test/Transforms/IndVarSimplify/udiv.ll | ||
| 135 | +++ b/test/Transforms/IndVarSimplify/udiv.ll | ||
| 136 | @@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind | ||
| 137 | ; CHECK-LABEL: @foo( | ||
| 138 | ; CHECK: for.body.preheader: | ||
| 139 | ; CHECK-NOT: udiv | ||
| 140 | +; CHECK: for.body: | ||
| 141 | |||
| 142 | define void @foo(double* %p, i64 %n) nounwind { | ||
| 143 | entry: | ||
| 144 | -- | ||
| 145 | 2.18.0 | ||
| 146 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend index 50ab25e0..8a2cc37f 100644 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend | |||
| @@ -1,14 +1,11 @@ | |||
| 1 | FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:" | 1 | FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:" |
| 2 | 2 | ||
| 3 | SRC_URI_append_intel-x86-common = " \ | 3 | SRC_URI_append_intel-x86-common = " \ |
| 4 | file://0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch;patchdir=clang \ | ||
| 5 | file://0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch;patchdir=clang \ | ||
| 6 | file://0003-OpenCL-Fix-assertion-due-to-blocks.patch;patchdir=clang \ | ||
| 7 | file://0001-dont-export-targets-for-binaries.patch \ | 4 | file://0001-dont-export-targets-for-binaries.patch \ |
| 8 | git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_80;destsuffix=git/llvm/projects/llvm-spirv;name=spirv \ | 5 | file://BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch;patchdir=llvm \ |
| 9 | file://0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch;patchdir=llvm/projects/llvm-spirv \ | 6 | file://IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch;patchdir=llvm \ |
| 10 | file://0002-Remove-extra-semicolon.patch;patchdir=llvm/projects/llvm-spirv \ | 7 | git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_90;destsuffix=git/llvm/projects/llvm-spirv;name=spirv \ |
| 11 | file://0001-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \ | 8 | file://0001-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \ |
| 12 | " | 9 | " |
| 13 | 10 | ||
| 14 | SRCREV_spirv = "1d48cd84d04a2f60b43ea3f66eb7c86f4e5973a9" | 11 | SRCREV_spirv = "70420631144a6a25613ae37178f2cc1d3607b65b" |
