summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNaveen Saini <naveen.kumar.saini@intel.com>2019-09-12 22:21:43 +0800
committerAnuj Mittal <anuj.mittal@intel.com>2019-09-13 16:17:22 +0800
commit1b076fd8dba94042b76e37f18f2d3bc74cbe717b (patch)
treee7475c621763efc21fedf9639a98561f8a2dcb37
parent420247ef2db5c08ade2a34d4170f8db8aa7bc439 (diff)
downloadmeta-intel-1b076fd8dba94042b76e37f18f2d3bc74cbe717b.tar.gz
llvm-project-source: update SPIRV-LLVM-Translator 8.0.0 -> 9.0.0
Remove all the backported patches which are available in 9.0.0 release. Few patches were recommended from llvm-patches repo: https://github.com/intel/intel-graphics-compiler/blob/master/documentation/build_ubuntu.md https://github.com/intel/llvm-patches/commit/3906cc086f675847ca99b08107d18e083803d53c Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch156
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch986
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch28
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-skip-building-tests.patch17
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch294
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-Remove-extra-semicolon.patch27
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch61
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch111
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch146
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend11
10 files changed, 285 insertions, 1552 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch
deleted file mode 100644
index 1c491402..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch
+++ /dev/null
@@ -1,156 +0,0 @@
1From 39a3ac0065c23d1e2d55dfd8792cc28a146a4307 Mon Sep 17 00:00:00 2001
2From: Alexey Bader <alexey.bader@intel.com>
3Date: Tue, 19 Feb 2019 15:19:06 +0000
4Subject: [PATCH 1/2] [OpenCL] Change type of block pointer for OpenCL
5
6Summary:
7
8For some reason OpenCL blocks in LLVM IR are represented as function pointers.
9These pointers do not point to any real function and never get called. Actually
10they point to some structure, which in turn contains pointer to the real block
11invoke function.
12This patch changes represntation of OpenCL blocks in LLVM IR from function
13pointers to pointers to `%struct.__block_literal_generic`.
14Such representation allows to avoid unnecessary bitcasts and simplifies
15further processing (e.g. translation to SPIR-V ) of the module for targets
16which do not support function pointers.
17
18Patch by: Alexey Sotkin.
19
20Reviewers: Anastasia, yaxunl, svenvh
21
22Reviewed By: Anastasia
23
24Subscribers: alexbatashev, cfe-commits
25
26Tags: #clang
27
28Differential Revision: https://reviews.llvm.org/D58277
29
30git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354337 91177308-0d34-0410-b5e6-96231b3b80d8
31
32Upstream-Status: Backport
33[https://github.com/llvm-mirror/clang/commit/283f308bdb5893bab1f36791711346e746045f94]
34Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
35---
36 lib/CodeGen/CodeGenTypes.cpp | 4 +++-
37 test/CodeGenOpenCL/blocks.cl | 18 ++++++++----------
38 test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 18 +++++++++---------
39 3 files changed, 20 insertions(+), 20 deletions(-)
40
41diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
42index 2acf1ac..93b3ebf 100644
43--- a/lib/CodeGen/CodeGenTypes.cpp
44+++ b/lib/CodeGen/CodeGenTypes.cpp
45@@ -637,7 +637,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
46
47 case Type::BlockPointer: {
48 const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType();
49- llvm::Type *PointeeType = ConvertTypeForMem(FTy);
50+ llvm::Type *PointeeType = CGM.getLangOpts().OpenCL
51+ ? CGM.getGenericBlockLiteralType()
52+ : ConvertTypeForMem(FTy);
53 unsigned AS = Context.getTargetAddressSpace(FTy);
54 ResultType = llvm::PointerType::get(PointeeType, AS);
55 break;
56diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
57index 675240c..19aacc3 100644
58--- a/test/CodeGenOpenCL/blocks.cl
59+++ b/test/CodeGenOpenCL/blocks.cl
60@@ -35,11 +35,10 @@ void foo(){
61 // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
62 // SPIR: %[[i_value:.*]] = load i32, i32* %i
63 // SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
64- // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
65- // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
66- // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
67- // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
68- // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
69+ // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic*
70+ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
71+ // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
72+ // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
73 // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
74 // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
75 // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
76@@ -50,11 +49,10 @@ void foo(){
77 // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
78 // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
79 // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
80- // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
81- // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
82- // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
83- // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
84- // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic*
85+ // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)*
86+ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
87+ // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
88+ // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
89 // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
90 // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
91 // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
92diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
93index 4732194..8445016 100644
94--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
95+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
96@@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t;
97
98 // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
99 // COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
100-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
101+// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*)
102
103 // For anonymous blocks without captures, emit block literals as global variable.
104 // COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
105@@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
106 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
107 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
108 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
109- // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
110- // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
111- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
112+ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic*
113+ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
114+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
115 // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
116 // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
117 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
118@@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
119 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
120 // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
121 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
122- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
123- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
124+ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic*
125+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
126 // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
127 // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
128 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
129@@ -300,13 +300,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
130 // Emits global block literal [[BLG8]] and invoke function [[INVG8]].
131 // The full type of these expressions are long (and repeated elsewhere), so we
132 // capture it as part of the regex for convenience and clarity.
133- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
134+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A
135 void (^const block_A)(void) = ^{
136 return;
137 };
138
139 // Emits global block literal [[BLG9]] and invoke function [[INVG9]].
140- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
141+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B
142 void (^const block_B)(local void *) = ^(local void *a) {
143 return;
144 };
145@@ -346,7 +346,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
146 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
147 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
148 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
149- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
150+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
151 // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
152 // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
153 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
154--
1551.8.3.1
156
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
deleted file mode 100644
index 2037421b..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
+++ /dev/null
@@ -1,986 +0,0 @@
1From 177cce531fd3665bb964a03db51890e0241e3e72 Mon Sep 17 00:00:00 2001
2From: Alexey Sotkin <alexey.sotkin@intel.com>
3Date: Thu, 21 Feb 2019 17:14:36 +0300
4Subject: [PATCH] Update LowerOpenCL pass to handle new blocks represntation in
5 LLVM IR
6
7Upstream-Status: Backport [https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/bd6ddfaf7232cd81c7f2fe9877e66f286731bd8e]
8Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
9
10---
11 lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 249 ++++--------------------------
12 test/global_block.ll | 71 ++++-----
13 test/literal-struct.ll | 31 ++--
14 test/transcoding/block_w_struct_return.ll | 47 +++---
15 test/transcoding/enqueue_kernel.ll | 237 ++++++++++++++++------------
16 5 files changed, 235 insertions(+), 400 deletions(-)
17
18diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
19index c80bf04..b42a4ec 100644
20--- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
21+++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
22@@ -40,207 +40,34 @@
23 // In both cases values with function type used as intermediate representation
24 // for block literal structure.
25 //
26-// This pass is designed to find such cases and simplify them to avoid any
27-// function pointer types occurrences in LLVM IR in 4 steps.
28-//
29-// 1. Find all function pointer allocas, like
30-// %block = alloca void () *
31-//
32-// Then find a single store to that alloca:
33-// %blockLit = alloca <{ i32, i32, ...}>, align 4
34-// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()*
35-// > store void ()* %0, void ()** %block, align 4
36-//
37-// And replace the alloca users by new instructions which used stored value
38-// %blockLit itself instead of function pointer alloca %block.
39-//
40-// 2. Find consecutive casts from block literal type to i8 addrspace(4)*
41-// used function pointers as an intermediate type:
42-// %0 = bitcast <{ i32, i32 }> %block to void() *
43-// %1 = addrspacecast void() * %0 to i8 addrspace(4)*
44-// And simplify them:
45-// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)*
46-//
47-// 3. Find all unused instructions with function pointer type occured after
48-// pp.1-2 and remove them.
49-//
50-// 4. Find unused globals with function pointer type, like
51-// @block = constant void ()*
52-// bitcast ({ i32, i32 }* @__block_literal_global to void ()*
53-//
54-// And remove them.
55+// In LLVM IR produced by clang, blocks are represented with the following
56+// structure:
57+// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
58+// Pointers to block invoke functions are stored in the third field. Clang
59+// replaces inderect function calls in all cases except if block is passed as a
60+// function argument. Note that it is somewhat unclear if the OpenCL C spec
61+// should allow passing blocks as function argumernts. This pass is not supposed
62+// to work correctly with such functions.
63+// Clang though has to store function pointers to this structure. Purpose of
64+// this pass is to replace store of function pointers(not allowed in SPIR-V)
65+// with null pointers.
66 //
67 //===----------------------------------------------------------------------===//
68 #define DEBUG_TYPE "spv-lower-ocl-blocks"
69
70-#include "OCLUtil.h"
71 #include "SPIRVInternal.h"
72
73-#include "llvm/ADT/SetVector.h"
74-#include "llvm/Analysis/ValueTracking.h"
75-#include "llvm/IR/GlobalVariable.h"
76-#include "llvm/IR/InstIterator.h"
77 #include "llvm/IR/Module.h"
78 #include "llvm/Pass.h"
79-#include "llvm/PassSupport.h"
80-#include "llvm/Support/Casting.h"
81+#include "llvm/Support/Regex.h"
82
83 using namespace llvm;
84
85 namespace {
86
87-static void
88-removeUnusedFunctionPtrInst(Instruction *I,
89- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
90- for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) {
91- Instruction *OpI = dyn_cast<Instruction>(I->getOperand(OpIdx));
92- I->setOperand(OpIdx, nullptr);
93- if (OpI && OpI != I && OpI->user_empty())
94- FuncPtrInsts.insert(OpI);
95- }
96- I->eraseFromParent();
97-}
98-
99-static bool isFuncPtrAlloca(const AllocaInst *AI) {
100- auto *ET = dyn_cast<PointerType>(AI->getAllocatedType());
101- return ET && ET->getElementType()->isFunctionTy();
102-}
103-
104-static bool hasFuncPtrType(const Value *V) {
105- auto *PT = dyn_cast<PointerType>(V->getType());
106- return PT && PT->getElementType()->isFunctionTy();
107-}
108-
109-static bool isFuncPtrInst(const Instruction *I) {
110- if (auto *AI = dyn_cast<AllocaInst>(I))
111- return isFuncPtrAlloca(AI);
112-
113- for (auto &Op : I->operands()) {
114- if (auto *AI = dyn_cast<AllocaInst>(Op))
115- return isFuncPtrAlloca(AI);
116-
117- auto *OpI = dyn_cast<Instruction>(&Op);
118- if (OpI && OpI != I && hasFuncPtrType(OpI))
119- return true;
120- }
121- return false;
122-}
123-
124-static StoreInst *findSingleStore(AllocaInst *AI) {
125- StoreInst *Store = nullptr;
126- for (auto *U : AI->users()) {
127- if (!isa<StoreInst>(U))
128- continue; // not a store
129- if (Store)
130- return nullptr; // there are more than one stores
131- Store = dyn_cast<StoreInst>(U);
132- }
133- return Store;
134-}
135-
136-static void fixFunctionPtrAllocaUsers(AllocaInst *AI) {
137- // Find and remove a single store to alloca
138- auto *SingleStore = findSingleStore(AI);
139- assert(SingleStore && "More than one store to the function pointer alloca");
140- auto *StoredVal = SingleStore->getValueOperand();
141- SingleStore->eraseFromParent();
142-
143- // Find loads from the alloca and replace thier users
144- for (auto *U : AI->users()) {
145- auto *LI = dyn_cast<LoadInst>(U);
146- if (!LI)
147- continue;
148-
149- for (auto *U : LI->users()) {
150- auto *UInst = cast<Instruction>(U);
151- auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast(
152- StoredVal, UInst->getType(), "", UInst);
153- UInst->replaceAllUsesWith(Cast);
154- }
155- }
156-}
157-
158-static int getBlockLiteralIdx(const Function &F) {
159- StringRef FName = F.getName();
160- if (isEnqueueKernelBI(FName))
161- return FName.contains("events") ? 7 : 4;
162- if (isKernelQueryBI(FName))
163- return FName.contains("for_ndrange") ? 2 : 1;
164- if (FName.startswith("__") && FName.contains("_block_invoke"))
165- return F.hasStructRetAttr() ? 1 : 0;
166-
167- return -1; // No block literal argument
168-}
169-
170-static bool hasBlockLiteralArg(const Function &F) {
171- return getBlockLiteralIdx(F) != -1;
172-}
173-
174-static bool simplifyFunctionPtrCasts(Function &F) {
175- bool Changed = false;
176- int BlockLiteralIdx = getBlockLiteralIdx(F);
177- for (auto *U : F.users()) {
178- auto *Call = dyn_cast<CallInst>(U);
179- if (!Call)
180- continue;
181- if (Call->getFunction()->getName() == F.getName().str() + "_kernel")
182- continue; // Skip block invoke function calls inside block invoke kernels
183-
184- const DataLayout &DL = F.getParent()->getDataLayout();
185- auto *BlockLiteral = Call->getOperand(BlockLiteralIdx);
186- auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL);
187- if (isa<GlobalVariable>(BlockLiteralVal))
188- continue; // nothing to do with globals
189-
190- auto *BlockLiteralAlloca = cast<AllocaInst>(BlockLiteralVal);
191- assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() &&
192- "Function type shouldn't be there");
193-
194- auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast(
195- BlockLiteralAlloca, BlockLiteral->getType(), "", Call);
196- BlockLiteral->replaceAllUsesWith(NewBlockLiteral);
197- Changed |= true;
198- }
199- return Changed;
200-}
201-
202-static void
203-findFunctionPtrAllocas(Module &M,
204- SmallVectorImpl<AllocaInst *> &FuncPtrAllocas) {
205- for (auto &F : M) {
206- if (F.isDeclaration())
207- continue;
208- for (auto &I : instructions(F)) {
209- auto *AI = dyn_cast<AllocaInst>(&I);
210- if (!AI || !isFuncPtrAlloca(AI))
211- continue;
212- FuncPtrAllocas.push_back(AI);
213- }
214- }
215-}
216-
217-static void
218-findUnusedFunctionPtrInsts(Module &M,
219- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
220- for (auto &F : M) {
221- if (F.isDeclaration())
222- continue;
223- for (auto &I : instructions(F))
224- if (I.user_empty() && isFuncPtrInst(&I))
225- FuncPtrInsts.insert(&I);
226- }
227-}
228-
229-static void
230-findUnusedFunctionPtrGlbs(Module &M,
231- SmallVectorImpl<GlobalVariable *> &FuncPtrGlbs) {
232- for (auto &GV : M.globals()) {
233- if (!GV.user_empty())
234- continue;
235- auto *GVType = dyn_cast<PointerType>(GV.getType()->getElementType());
236- if (GVType && GVType->getElementType()->isFunctionTy())
237- FuncPtrGlbs.push_back(&GV);
238- }
239+static bool isBlockInvoke(Function &F) {
240+ static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$");
241+ return BlockInvokeRegex.match(F.getName());
242 }
243
244 class SPIRVLowerOCLBlocks : public ModulePass {
245@@ -250,44 +77,24 @@ public:
246
247 bool runOnModule(Module &M) {
248 bool Changed = false;
249-
250- // 1. Find function pointer allocas and fix their users
251- SmallVector<AllocaInst *, 16> FuncPtrAllocas;
252- findFunctionPtrAllocas(M, FuncPtrAllocas);
253-
254- Changed |= !FuncPtrAllocas.empty();
255- for (auto *AI : FuncPtrAllocas)
256- fixFunctionPtrAllocaUsers(AI);
257-
258- // 2. Simplify consecutive casts which use function pointer types
259- for (auto &F : M)
260- if (hasBlockLiteralArg(F))
261- Changed |= simplifyFunctionPtrCasts(F);
262-
263- // 3. Cleanup unused instructions with function pointer type
264- // which are occured after pp. 1-2
265- SmallSetVector<Instruction *, 16> FuncPtrInsts;
266- findUnusedFunctionPtrInsts(M, FuncPtrInsts);
267-
268- Changed |= !FuncPtrInsts.empty();
269- while (!FuncPtrInsts.empty()) {
270- Instruction *I = FuncPtrInsts.pop_back_val();
271- removeUnusedFunctionPtrInst(I, FuncPtrInsts);
272+ for (Function &F : M) {
273+ if (!isBlockInvoke(F))
274+ continue;
275+ for (User *U : F.users()) {
276+ if (!isa<Constant>(U))
277+ continue;
278+ Constant *Null = Constant::getNullValue(U->getType());
279+ if (U != Null) {
280+ U->replaceAllUsesWith(Null);
281+ Changed = true;
282+ }
283+ }
284 }
285-
286- // 4. Find and remove unused global variables with function pointer type
287- SmallVector<GlobalVariable *, 16> FuncPtrGlbs;
288- findUnusedFunctionPtrGlbs(M, FuncPtrGlbs);
289-
290- Changed |= !FuncPtrGlbs.empty();
291- for (auto *GV : FuncPtrGlbs)
292- GV->eraseFromParent();
293-
294 return Changed;
295 }
296
297 static char ID;
298-}; // class SPIRVLowerOCLBlocks
299+};
300
301 char SPIRVLowerOCLBlocks::ID = 0;
302
303diff --git a/test/global_block.ll b/test/global_block.ll
304index 4fc453b..b558213 100644
305--- a/test/global_block.ll
306+++ b/test/global_block.ll
307@@ -17,7 +17,7 @@
308 ; RUN: spirv-val %t.spv
309 ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM
310
311-target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
312+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
313 target triple = "spir-unknown-unknown"
314
315 ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke"
316@@ -27,71 +27,56 @@ target triple = "spir-unknown-unknown"
317 ; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]]
318 ; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]]
319
320-;; This variable is not needed in SPIRV
321-; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1
322-; CHECK-LLVM-NOT: @block_kernel.b1
323-@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8
324+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
325
326-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
327+@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4
328+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
329
330-; Function Attrs: convergent nounwind
331-define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
332+; Function Attrs: convergent noinline nounwind optnone
333+define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
334 entry:
335- %res.addr = alloca i32 addrspace(1)*, align 8
336- store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
337-
338+ %res.addr = alloca i32 addrspace(1)*, align 4
339+ store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4
340 ; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]]
341 ; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5)
342- %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
343-
344- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
345- store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14
346+ %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
347+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4
348+ store i32 %call, i32 addrspace(1)* %0, align 4
349 ret void
350 }
351
352-; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]]
353+; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]]
354 ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}}
355 ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}}
356 ; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}})
357-; Function Attrs: convergent nounwind
358+; Function Attrs: convergent noinline nounwind optnone
359 define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 {
360 entry:
361- %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
362+ %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
363 %i.addr = alloca i32, align 4
364- store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
365- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
366- store i32 %i, i32* %i.addr, align 4, !tbaa !14
367- %0 = load i32, i32* %i.addr, align 4, !tbaa !14
368+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
369+ store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
370+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
371+ store i32 %i, i32* %i.addr, align 4
372+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
373+ %0 = load i32, i32* %i.addr, align 4
374 %add = add nsw i32 %0, 1
375 ret i32 %add
376 }
377
378-attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
379-attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
380+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
381+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
382 attributes #2 = { convergent }
383
384 !llvm.module.flags = !{!0}
385-!opencl.enable.FP_CONTRACT = !{}
386 !opencl.ocl.version = !{!1}
387 !opencl.spir.version = !{!1}
388-!opencl.used.extensions = !{!2}
389-!opencl.used.optional.core.features = !{!2}
390-!opencl.compiler.options = !{!2}
391-!llvm.ident = !{!3}
392+!llvm.ident = !{!2}
393
394 !0 = !{i32 1, !"wchar_size", i32 4}
395 !1 = !{i32 2, i32 0}
396-!2 = !{}
397-!3 = !{!"clang version 7.0.0"}
398-!4 = !{i32 1}
399-!5 = !{!"none"}
400-!6 = !{!"int*"}
401-!7 = !{!""}
402-!8 = !{i1 false}
403-!9 = !{i32 0}
404-!10 = !{!11, !11, i64 0}
405-!11 = !{!"any pointer", !12, i64 0}
406-!12 = !{!"omnipotent char", !13, i64 0}
407-!13 = !{!"Simple C/C++ TBAA"}
408-!14 = !{!15, !15, i64 0}
409-!15 = !{!"int", !12, i64 0}
410+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
411+!3 = !{i32 1}
412+!4 = !{!"none"}
413+!5 = !{!"int*"}
414+!6 = !{!""}
415diff --git a/test/literal-struct.ll b/test/literal-struct.ll
416index b88187f..dec957a 100644
417--- a/test/literal-struct.ll
418+++ b/test/literal-struct.ll
419@@ -2,7 +2,7 @@
420 ; structs, i.e. structs whose type has no name. Typicaly clang generate such
421 ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with
422 ; the following command:
423-; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll
424+; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll
425
426 ; literal-struct.cl:
427 ; void foo()
428@@ -17,25 +17,28 @@
429 ; RUN: llvm-spirv %t.bc -o %t.spv
430 ; RUN: spirv-val %t.spv
431
432-; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0
433-; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}}
434+; CHECK: TypeInt [[Int:[0-9]+]] 32 0
435+; CHECK: TypeInt [[Int8:[0-9]+]] 8 0
436+; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]]
437+; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]]
438
439 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
440 target triple = "spir"
441
442-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
443+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
444+
445+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
446 ; CHECK: ConstantComposite [[StructType]]
447
448-; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct.
449-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4
450+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4
451 ; CHECK: ConstantNull [[StructType]]
452
453 ; Function Attrs: convergent noinline nounwind optnone
454 define spir_func void @foo() #0 {
455 entry:
456- %myBlock = alloca void () addrspace(4)*, align 4
457- store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4
458- call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
459+ %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4
460+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4
461+ call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
462 ret void
463 }
464
465@@ -43,14 +46,14 @@ entry:
466 define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 {
467 entry:
468 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
469- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
470+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
471 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
472- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
473- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
474+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
475+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
476 ret void
477 }
478
479-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
480+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
481 attributes #1 = { convergent }
482
483 !llvm.module.flags = !{!0}
484@@ -60,4 +63,4 @@ attributes #1 = { convergent }
485
486 !0 = !{i32 1, !"wchar_size", i32 4}
487 !1 = !{i32 2, i32 0}
488-!2 = !{!"clang version 8.0.0 "}
489+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
490diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll
491index a68820f..ebd2c5f 100644
492--- a/test/transcoding/block_w_struct_return.ll
493+++ b/test/transcoding/block_w_struct_return.ll
494@@ -16,6 +16,8 @@
495 ; res[tid] = kernelBlock(aa).a - 6;
496 ; }
497
498+; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll
499+
500 ; RUN: llvm-as %s -o %t.bc
501 ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt
502 ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV
503@@ -28,12 +30,14 @@
504 ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke"
505
506 ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32
507+; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8
508+; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]]
509 ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]]
510 ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]]
511
512 ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7
513 ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7
514-; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}}
515+; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}}
516 ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]]
517
518 ; CHECK-LLVM: %[[StructA:.*]] = type { i32 }
519@@ -42,20 +46,21 @@
520 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
521 target triple = "spir64-unknown-unknown"
522
523+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
524 %struct.A = type { i32 }
525
526-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
527+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8
528
529 ; Function Attrs: convergent noinline nounwind optnone
530-define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 {
531+define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
532 entry:
533 %res.addr = alloca i32 addrspace(1)*, align 8
534- %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8
535+ %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8
536 %tid = alloca i64, align 8
537 %aa = alloca %struct.A, align 4
538 %tmp = alloca %struct.A, align 4
539 store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8
540- store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8
541+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8
542 %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4
543 store i64 %call, i64* %tid, align 8
544 %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8
545@@ -64,7 +69,7 @@ entry:
546 store i32 -1, i32 addrspace(1)* %arrayidx, align 4
547 %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0
548 store i32 5, i32* %a, align 4
549- call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
550+ call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
551 %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0
552 %2 = load i32, i32* %a1, align 4
553 %sub = sub nsw i32 %2, 6
554@@ -79,10 +84,10 @@ entry:
555 define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 {
556 entry:
557 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
558- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8
559+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8
560 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
561- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
562- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8
563+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
564+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8
565 %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0
566 store i32 6, i32* %a1, align 4
567 %0 = bitcast %struct.A* %agg.result to i8*
568@@ -97,30 +102,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r
569 ; Function Attrs: convergent nounwind readnone
570 declare spir_func i64 @_Z13get_global_idj(i32) #3
571
572-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
573-attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
574+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
575+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
576 attributes #2 = { argmemonly nounwind }
577 attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
578 attributes #4 = { convergent nounwind readnone }
579 attributes #5 = { convergent }
580
581 !llvm.module.flags = !{!0}
582-!opencl.enable.FP_CONTRACT = !{}
583 !opencl.ocl.version = !{!1}
584 !opencl.spir.version = !{!1}
585-!opencl.used.extensions = !{!2}
586-!opencl.used.optional.core.features = !{!2}
587-!opencl.compiler.options = !{!2}
588-!llvm.ident = !{!3}
589+!llvm.ident = !{!2}
590
591 !0 = !{i32 1, !"wchar_size", i32 4}
592 !1 = !{i32 2, i32 0}
593-!2 = !{}
594-!3 = !{!"clang version 7.0.0"}
595-!4 = !{i32 1}
596-!5 = !{!"none"}
597-!6 = !{!"int*"}
598-!7 = !{!""}
599-!8 = !{i1 false}
600-!9 = !{i32 0}
601-
602+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
603+!3 = !{i32 1}
604+!4 = !{!"none"}
605+!5 = !{!"int*"}
606+!6 = !{!""}
607diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll
608index 1f0b360..761043e 100644
609--- a/test/transcoding/enqueue_kernel.ll
610+++ b/test/transcoding/enqueue_kernel.ll
611@@ -51,11 +51,12 @@
612 ; ModuleID = 'enqueue_kernel.cl'
613 source_filename = "enqueue_kernel.cl"
614 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
615-target triple = "spir-unknown-unknown"
616+target triple = "spir"
617
618 %opencl.queue_t = type opaque
619 %struct.ndrange_t = type { i32 }
620 %opencl.clk_event_t = type opaque
621+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
622
623 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel"
624 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel"
625@@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown"
626
627 ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32
628 ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8
629-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
630 ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0
631-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17
632+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21
633 ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2
634-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20
635-; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
636+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
637+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24
638
639 ; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}}
640+; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
641+; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
642 ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]]
643 ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]]
644-; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
645 ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]]
646 ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
647 ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
648 ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
649 ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]]
650
651-; CHECK-LLVM: [[BlockTy1:%[0-9a-z\.]+]] = type { i32, i32 }
652-; CHECK-LLVM: [[BlockTy2:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }>
653-; CHECK-LLVM: [[BlockTy3:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
654-; CHECK-LLVM: [[BlockTy4:%[0-9a-z\.]+]] = type <{ i32, i32 }>
655+; CHECK-LLVM: [[BlockTy1:%[0-9a-z\.]+]] = type { i32, i32, i8 addrspace(4)* }
656+; CHECK-LLVM: [[BlockTy2:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>
657+; CHECK-LLVM: [[BlockTy3:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
658+; CHECK-LLVM: [[BlockTy4:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)* }>
659
660-; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
661-; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
662+; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
663+; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
664
665-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
666-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
667+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4
668+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4
669
670 ; Function Attrs: convergent noinline nounwind optnone
671-define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
672+define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
673 entry:
674+ %a.addr = alloca i32 addrspace(1)*, align 4
675+ %b.addr = alloca i32 addrspace(1)*, align 4
676+ %i.addr = alloca i32, align 4
677+ %c0.addr = alloca i8, align 1
678 %default_queue = alloca %opencl.queue_t*, align 4
679 %flags = alloca i32, align 4
680 %ndrange = alloca %struct.ndrange_t, align 4
681 %clk_event = alloca %opencl.clk_event_t*, align 4
682 %event_wait_list = alloca %opencl.clk_event_t*, align 4
683 %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4
684- %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4
685- %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
686+ %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4
687+ %tmp = alloca %struct.ndrange_t, align 4
688+ %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
689+ %tmp4 = alloca %struct.ndrange_t, align 4
690 %c = alloca i8, align 1
691+ %tmp11 = alloca %struct.ndrange_t, align 4
692+ %block_sizes = alloca [1 x i32], align 4
693+ %tmp12 = alloca %struct.ndrange_t, align 4
694+ %block_sizes13 = alloca [3 x i32], align 4
695+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
696+ store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4
697+ store i32 %i, i32* %i.addr, align 4
698+ store i8 %c0, i8* %c0.addr, align 1
699 store i32 0, i32* %flags, align 4
700 %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
701 %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4
702 store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4
703 %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
704 %2 = load i32, i32* %flags, align 4
705- %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
706- store i32 17, i32* %block.size, align 4
707- %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
708+ %3 = bitcast %struct.ndrange_t* %tmp to i8*
709+ %4 = bitcast %struct.ndrange_t* %ndrange to i8*
710+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false)
711+ %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
712+ store i32 21, i32* %block.size, align 4
713+ %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
714 store i32 4, i32* %block.align, align 4
715- %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
716- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4
717- %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
718- store i32 %i, i32* %block.captured1, align 4
719- %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
720- store i8 %c0, i8* %block.captured2, align 4
721- %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()*
722- %4 = addrspacecast void ()* %3 to i8 addrspace(4)*
723+ %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
724+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4
725+ %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
726+ %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
727+ store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4
728+ %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
729+ %6 = load i32, i32* %i.addr, align 4
730+ store i32 %6, i32* %block.captured1, align 4
731+ %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5
732+ %7 = load i8, i8* %c0.addr, align 1
733+ store i8 %7, i8* %block.captured2, align 4
734+ %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic*
735+ %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)*
736
737 ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]]
738 ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}}
739 ; [[ConstInt0]] [[EventNull]] [[EventNull]]
740 ; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]]
741
742-; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)*
743+; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic*
744+; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)*
745 ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)*
746-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]])
747-
748- %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4)
749- %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
750- %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
751- %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
752- store i32 20, i32* %block.size5, align 4
753- %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
754+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]])
755+
756+ %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
757+ %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
758+ %12 = load i32, i32* %flags, align 4
759+ %13 = bitcast %struct.ndrange_t* %tmp4 to i8*
760+ %14 = bitcast %struct.ndrange_t* %ndrange to i8*
761+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false)
762+ %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
763+ %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
764+ %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
765+ store i32 24, i32* %block.size5, align 4
766+ %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
767 store i32 4, i32* %block.align6, align 4
768- %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
769- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4
770- %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
771- store i32 %i, i32* %block.captured8, align 4
772- %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
773- store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4
774- %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()*
775- %9 = addrspacecast void ()* %8 to i8 addrspace(4)*
776+ %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
777+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4
778+ %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
779+ %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
780+ store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4
781+ %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
782+ %18 = load i32, i32* %i.addr, align 4
783+ store i32 %18, i32* %block.captured9, align 4
784+ %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5
785+ %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4
786+ store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4
787+ %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic*
788+ %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)*
789+
790
791 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]]
792 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]]
793@@ -158,16 +193,24 @@ entry:
794 ; [[ConstInt2]] [[Event1]] [[Event2]]
795 ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]]
796
797-; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)*
798+; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic*
799+; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4)
800 ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)*
801-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]])
802-
803- %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
804- %11 = alloca [1 x i32]
805- %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0
806- %13 = load i8, i8* %c, align 1
807- %14 = zext i8 %13 to i32
808- store i32 %14, i32* %12, align 4
809+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]])
810+
811+ %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21)
812+ %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
813+ %24 = load i32, i32* %flags, align 4
814+ %25 = bitcast %struct.ndrange_t* %tmp11 to i8*
815+ %26 = bitcast %struct.ndrange_t* %ndrange to i8*
816+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false)
817+ %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
818+ %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)*
819+ %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
820+ %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0
821+ %30 = load i8, i8* %c, align 1
822+ %31 = zext i8 %30 to i32
823+ store i32 %31, i32* %29, align 4
824
825 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]]
826 ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]]
827@@ -182,14 +225,18 @@ entry:
828 ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)*
829 ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}})
830
831- %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12)
832- %16 = alloca [3 x i32]
833- %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0
834- store i32 1, i32* %17, align 4
835- %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1
836- store i32 2, i32* %18, align 4
837- %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2
838- store i32 4, i32* %19, align 4
839+ %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29)
840+ %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
841+ %34 = load i32, i32* %flags, align 4
842+ %35 = bitcast %struct.ndrange_t* %tmp12 to i8*
843+ %36 = bitcast %struct.ndrange_t* %ndrange to i8*
844+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false)
845+ %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0
846+ store i32 1, i32* %37, align 4
847+ %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1
848+ store i32 2, i32* %38, align 4
849+ %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2
850+ store i32 4, i32* %39, align 4
851
852 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]]
853 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]]
854@@ -206,24 +253,27 @@ entry:
855 ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)*
856 ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}})
857
858- %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17)
859+ %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37)
860 ret void
861 }
862
863+; Function Attrs: argmemonly nounwind
864+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1
865+
866 ; Function Attrs: convergent noinline nounwind optnone
867 define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 {
868 entry:
869 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
870- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
871+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
872 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
873- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
874- store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
875- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
876+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
877+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
878+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5
879 %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4
880 %conv = sext i8 %0 to i32
881- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2
882+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
883 %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4
884- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
885+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
886 %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4
887 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2
888 store i32 %conv, i32 addrspace(1)* %arrayidx, align 4
889@@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i
890 define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 {
891 entry:
892 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
893- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
894+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
895 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
896- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
897- store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
898- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
899+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
900+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
901+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5
902 %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4
903- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
904+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
905 %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4
906 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1
907 %2 = load i32, i32 addrspace(1)* %arrayidx, align 4
908- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2
909+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
910 %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4
911- %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
912+ %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
913 %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4
914 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4
915 store i32 %2, i32 addrspace(1)* %arrayidx4, align 4
916@@ -276,11 +326,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac
917 entry:
918 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
919 %p.addr = alloca i8 addrspace(3)*, align 4
920- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
921+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
922 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
923- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
924+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
925 store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4
926- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
927+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
928 ret void
929 }
930
931@@ -300,13 +350,13 @@ entry:
932 %p1.addr = alloca i8 addrspace(3)*, align 4
933 %p2.addr = alloca i8 addrspace(3)*, align 4
934 %p3.addr = alloca i8 addrspace(3)*, align 4
935- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
936+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
937 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
938- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
939+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
940 store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4
941 store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4
942 store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4
943- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
944+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
945 ret void
946 }
947
948@@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*,
949 ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*)
950 ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)
951
952-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
953+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
954 attributes #1 = { argmemonly nounwind }
955-attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
956+attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
957 attributes #3 = { nounwind }
958
959 !llvm.module.flags = !{!0}
960-!opencl.enable.FP_CONTRACT = !{}
961 !opencl.ocl.version = !{!1}
962 !opencl.spir.version = !{!1}
963-!opencl.used.extensions = !{!2}
964-!opencl.used.optional.core.features = !{!2}
965-!opencl.compiler.options = !{!2}
966-!llvm.ident = !{!3}
967+!llvm.ident = !{!2}
968
969 !0 = !{i32 1, !"wchar_size", i32 4}
970 !1 = !{i32 2, i32 0}
971-!2 = !{}
972-!3 = !{!"clang version 7.0.0"}
973-!4 = !{i32 1, i32 1, i32 0, i32 0}
974-!5 = !{!"none", !"none", !"none", !"none"}
975-!6 = !{!"int*", !"int*", !"int", !"char"}
976-!7 = !{!"", !"", !"", !""}
977-!8 = !{i1 false, i1 false, i1 false, i1 false}
978-!9 = !{i32 0, i32 0, i32 0, i32 0}
979+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
980+!3 = !{i32 1, i32 1, i32 0, i32 0}
981+!4 = !{!"none", !"none", !"none", !"none"}
982+!5 = !{!"int*", !"int*", !"int", !"char"}
983+!6 = !{!"", !"", !"", !""}
984--
9852.7.4
986
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch
index 9d25bbad..da2475f9 100644
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch
@@ -1,38 +1,39 @@
1From 7bbd0058362ac3bb5edd7a82d43e1785810776b3 Mon Sep 17 00:00:00 2001 1From 559fb8f82295ec4dc64a132b6566939b85c1b6fe Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com> 2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Fri, 29 Mar 2019 08:56:53 +0800 3Date: Thu, 15 Aug 2019 22:34:31 +0800
4Subject: [PATCH] dont export targets for binaries 4Subject: [PATCH] dont export targets for binaries
5 5
6The projects using LLVM cmake modules look for target binaries in 6The projects using LLVM cmake modules look for target binaries in
7sysroot as a result which isn't desirable in this case and isn't needed 7sysroot as a result which isn't desirable in this case and isn't needed
8either. 8either.
9 9
10Upstream-Status: Inappropriate [cross-compile specific] 10Upstream-Status: Inappropriate [cross-compile specific]
11 11
12Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> 12Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
13Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
13--- 14---
14 llvm/cmake/modules/AddLLVM.cmake | 9 --------- 15 llvm/cmake/modules/AddLLVM.cmake | 9 ---------
15 llvm/cmake/modules/TableGen.cmake | 6 ------ 16 llvm/cmake/modules/TableGen.cmake | 6 ------
16 2 files changed, 15 deletions(-) 17 2 files changed, 15 deletions(-)
17 18
18diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake 19diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
19index 0df6845..b79f4fa 100644 20index 619e986b8aa..d2bc1a25dd9 100644
20--- a/llvm/cmake/modules/AddLLVM.cmake 21--- a/llvm/cmake/modules/AddLLVM.cmake
21+++ b/llvm/cmake/modules/AddLLVM.cmake 22+++ b/llvm/cmake/modules/AddLLVM.cmake
22@@ -866,12 +866,6 @@ macro(add_llvm_tool name) 23@@ -898,12 +898,6 @@ macro(add_llvm_tool name)
23 24
24 if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY) 25 if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
25 if( LLVM_BUILD_TOOLS ) 26 if( LLVM_BUILD_TOOLS )
27- set(export_to_llvmexports)
26- if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR 28- if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
27- NOT LLVM_DISTRIBUTION_COMPONENTS) 29- NOT LLVM_DISTRIBUTION_COMPONENTS)
28- set(export_to_llvmexports EXPORT LLVMExports) 30- set(export_to_llvmexports EXPORT LLVMExports)
29- set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True) 31- set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True)
30- endif() 32- endif()
31- 33
32 install(TARGETS ${name} 34 install(TARGETS ${name}
33 ${export_to_llvmexports} 35 ${export_to_llvmexports}
34 RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR} 36@@ -917,9 +911,6 @@ macro(add_llvm_tool name)
35@@ -884,9 +878,6 @@ macro(add_llvm_tool name)
36 endif() 37 endif()
37 endif() 38 endif()
38 endif() 39 endif()
@@ -43,18 +44,19 @@ index 0df6845..b79f4fa 100644
43 endmacro(add_llvm_tool name) 44 endmacro(add_llvm_tool name)
44 45
45diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake 46diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
46index 3c84ae7..141219f 100644 47index 36c026b5c0f..537acd696d8 100644
47--- a/llvm/cmake/modules/TableGen.cmake 48--- a/llvm/cmake/modules/TableGen.cmake
48+++ b/llvm/cmake/modules/TableGen.cmake 49+++ b/llvm/cmake/modules/TableGen.cmake
49@@ -164,14 +164,8 @@ macro(add_tablegen target project) 50@@ -148,15 +148,9 @@ macro(add_tablegen target project)
50 endif() 51 endif()
51 52
52 if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY) 53 if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY AND LLVM_BUILD_UTILS)
54- set(export_to_llvmexports)
53- if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR 55- if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
54- NOT LLVM_DISTRIBUTION_COMPONENTS) 56- NOT LLVM_DISTRIBUTION_COMPONENTS)
55- set(export_to_llvmexports EXPORT LLVMExports) 57- set(export_to_llvmexports EXPORT LLVMExports)
56- endif() 58- endif()
57- 59
58 install(TARGETS ${target} 60 install(TARGETS ${target}
59 ${export_to_llvmexports} 61 ${export_to_llvmexports}
60 RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR}) 62 RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR})
@@ -62,5 +64,5 @@ index 3c84ae7..141219f 100644
62- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target}) 64- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target})
63 endmacro() 65 endmacro()
64-- 66--
652.7.4 672.17.1
66 68
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-skip-building-tests.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-skip-building-tests.patch
index c58e7d98..9a7b8445 100644
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-skip-building-tests.patch
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-skip-building-tests.patch
@@ -1,9 +1,9 @@
1From 91db4c3cf7f290a3cab5caa316fc25a60dd409f1 Mon Sep 17 00:00:00 2001 1From 48e50f06b1bbed94cdf5207587161d4bfce7366e Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com> 2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 16 Aug 2019 20:25:16 +0800 3Date: Wed, 21 Aug 2019 14:35:31 +0800
4Subject: [PATCH] llvm-spirv: skip including tests 4Subject: [PATCH] llvm-spirv: skip building tests
5 5
6Some of these need clang to be built and since we're building this in-tree, 6Some of these need clang to be built and since we're building this in-tree,
7that leads to problems when compiling libcxx, compiler-rt which aren't built 7that leads to problems when compiling libcxx, compiler-rt which aren't built
8in-tree. 8in-tree.
9 9
@@ -13,12 +13,13 @@ all components, disable the building of tests altogether.
13Upstream-Status: Inappropriate 13Upstream-Status: Inappropriate
14 14
15Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> 15Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
16Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
16--- 17---
17 CMakeLists.txt | 10 ---------- 18 CMakeLists.txt | 10 ----------
18 1 file changed, 10 deletions(-) 19 1 file changed, 10 deletions(-)
19 20
20diff --git a/CMakeLists.txt b/CMakeLists.txt 21diff --git a/CMakeLists.txt b/CMakeLists.txt
21index d632a50..81ddf62 100644 22index 1208741..20ca3e6 100644
22--- a/CMakeLists.txt 23--- a/CMakeLists.txt
23+++ b/CMakeLists.txt 24+++ b/CMakeLists.txt
24@@ -15,13 +15,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) 25@@ -15,13 +15,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
@@ -32,7 +33,7 @@ index d632a50..81ddf62 100644
32- ) 33- )
33- endif(LLVM_INCLUDE_TESTS) 34- endif(LLVM_INCLUDE_TESTS)
34- 35-
35 find_package(LLVM 8.0.0 REQUIRED 36 find_package(LLVM 9.0.0 REQUIRED
36 COMPONENTS 37 COMPONENTS
37 Analysis 38 Analysis
38@@ -56,9 +49,6 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include) 39@@ -56,9 +49,6 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
@@ -46,5 +47,5 @@ index d632a50..81ddf62 100644
46 install( 47 install(
47 FILES 48 FILES
48-- 49--
492.7.4 502.17.1
50 51
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch
deleted file mode 100644
index 2e935a13..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch
+++ /dev/null
@@ -1,294 +0,0 @@
1From c94ec28600255098ffb9d73d1b386a7c8a535590 Mon Sep 17 00:00:00 2001
2From: Andrew Savonichev <andrew.savonichev@intel.com>
3Date: Thu, 21 Feb 2019 11:02:10 +0000
4Subject: [PATCH 2/2] [OpenCL] Simplify LLVM IR generated for OpenCL blocks
5
6Summary:
7Emit direct call of block invoke functions when possible, i.e. in case the
8block is not passed as a function argument.
9Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()`
10
11Reviewers: Anastasia, yaxunl, svenvh
12
13Reviewed By: Anastasia
14
15Subscribers: cfe-commits
16
17Tags: #clang
18
19Differential Revision: https://reviews.llvm.org/D58388
20
21git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354568 91177308-0d34-0410-b5e6-96231b3b80d8
22
23Upstream-Status: Backport
24[https://github.com/llvm-mirror/clang/commit/eae71f8d05ce550c4e2595c9b7082cc2c7882c58]
25Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
26---
27 lib/CodeGen/CGBlocks.cpp | 77 +++++++++++++-------------
28 lib/CodeGen/CGOpenCLRuntime.cpp | 30 +++++++---
29 lib/CodeGen/CGOpenCLRuntime.h | 4 ++
30 test/CodeGenOpenCL/blocks.cl | 10 +---
31 test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 34 +++++++++---
32 5 files changed, 91 insertions(+), 64 deletions(-)
33
34diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
35index fa3c3ee..10a0238 100644
36--- a/lib/CodeGen/CGBlocks.cpp
37+++ b/lib/CodeGen/CGBlocks.cpp
38@@ -1261,52 +1261,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
39 ReturnValueSlot ReturnValue) {
40 const BlockPointerType *BPT =
41 E->getCallee()->getType()->getAs<BlockPointerType>();
42-
43 llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
44-
45- // Get a pointer to the generic block literal.
46- // For OpenCL we generate generic AS void ptr to be able to reuse the same
47- // block definition for blocks with captures generated as private AS local
48- // variables and without captures generated as global AS program scope
49- // variables.
50- unsigned AddrSpace = 0;
51- if (getLangOpts().OpenCL)
52- AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
53-
54- llvm::Type *BlockLiteralTy =
55- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
56-
57- // Bitcast the callee to a block literal.
58- BlockPtr =
59- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
60-
61- // Get the function pointer from the literal.
62- llvm::Value *FuncPtr =
63- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
64- CGM.getLangOpts().OpenCL ? 2 : 3);
65-
66- // Add the block literal.
67+ llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType();
68+ llvm::Value *Func = nullptr;
69+ QualType FnType = BPT->getPointeeType();
70+ ASTContext &Ctx = getContext();
71 CallArgList Args;
72
73- QualType VoidPtrQualTy = getContext().VoidPtrTy;
74- llvm::Type *GenericVoidPtrTy = VoidPtrTy;
75 if (getLangOpts().OpenCL) {
76- GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType();
77- VoidPtrQualTy =
78- getContext().getPointerType(getContext().getAddrSpaceQualType(
79- getContext().VoidTy, LangAS::opencl_generic));
80- }
81-
82- BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy);
83- Args.add(RValue::get(BlockPtr), VoidPtrQualTy);
84-
85- QualType FnType = BPT->getPointeeType();
86+ // For OpenCL, BlockPtr is already casted to generic block literal.
87+
88+ // First argument of a block call is a generic block literal casted to
89+ // generic void pointer, i.e. i8 addrspace(4)*
90+ llvm::Value *BlockDescriptor = Builder.CreatePointerCast(
91+ BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType());
92+ QualType VoidPtrQualTy = Ctx.getPointerType(
93+ Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic));
94+ Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy);
95+ // And the rest of the arguments.
96+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
97+
98+ // We *can* call the block directly unless it is a function argument.
99+ if (!isa<ParmVarDecl>(E->getCalleeDecl()))
100+ Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
101+ else {
102+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2);
103+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
104+ }
105+ } else {
106+ // Bitcast the block literal to a generic block literal.
107+ BlockPtr = Builder.CreatePointerCast(
108+ BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal");
109+ // Get pointer to the block invoke function
110+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3);
111
112- // And the rest of the arguments.
113- EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
114+ // First argument is a block literal casted to a void pointer
115+ BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy);
116+ Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy);
117+ // And the rest of the arguments.
118+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
119
120- // Load the function.
121- llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
122+ // Load the function.
123+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
124+ }
125
126 const FunctionType *FuncTy = FnType->castAs<FunctionType>();
127 const CGFunctionInfo &FnInfo =
128diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
129index 7f6f595..75003e5 100644
130--- a/lib/CodeGen/CGOpenCLRuntime.cpp
131+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
132@@ -123,6 +123,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
133 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
134 }
135
136+// Get the block literal from an expression derived from the block expression.
137+// OpenCL v2.0 s6.12.5:
138+// Block variable declarations are implicitly qualified with const. Therefore
139+// all block variables must be initialized at declaration time and may not be
140+// reassigned.
141+static const BlockExpr *getBlockExpr(const Expr *E) {
142+ const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop.
143+ while(!isa<BlockExpr>(E) && E != Prev) {
144+ Prev = E;
145+ E = E->IgnoreCasts();
146+ if (auto DR = dyn_cast<DeclRefExpr>(E)) {
147+ E = cast<VarDecl>(DR->getDecl())->getInit();
148+ }
149+ }
150+ return cast<BlockExpr>(E);
151+}
152+
153 /// Record emitted llvm invoke function and llvm block literal for the
154 /// corresponding block expression.
155 void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
156@@ -137,20 +154,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
157 EnqueuedBlockMap[E].Kernel = nullptr;
158 }
159
160+llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
161+ return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
162+}
163+
164 CGOpenCLRuntime::EnqueuedBlockInfo
165 CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
166 CGF.EmitScalarExpr(E);
167
168 // The block literal may be assigned to a const variable. Chasing down
169 // to get the block literal.
170- if (auto DR = dyn_cast<DeclRefExpr>(E)) {
171- E = cast<VarDecl>(DR->getDecl())->getInit();
172- }
173- E = E->IgnoreImplicit();
174- if (auto Cast = dyn_cast<CastExpr>(E)) {
175- E = Cast->getSubExpr();
176- }
177- auto *Block = cast<BlockExpr>(E);
178+ const BlockExpr *Block = getBlockExpr(E);
179
180 assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
181 "Block expression not emitted");
182diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
183index 750721f..4effc7e 100644
184--- a/lib/CodeGen/CGOpenCLRuntime.h
185+++ b/lib/CodeGen/CGOpenCLRuntime.h
186@@ -92,6 +92,10 @@ public:
187 /// \param Block block literal emitted for the block expression.
188 void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
189 llvm::Value *Block);
190+
191+ /// \return LLVM block invoke function emitted for an expression derived from
192+ /// the block expression.
193+ llvm::Function *getInvokeFunction(const Expr *E);
194 };
195
196 }
197diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
198index 19aacc3..ab5a2c6 100644
199--- a/test/CodeGenOpenCL/blocks.cl
200+++ b/test/CodeGenOpenCL/blocks.cl
201@@ -39,11 +39,8 @@ void foo(){
202 // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
203 // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
204 // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
205- // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
206 // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
207- // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
208- // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
209- // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
210+ // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
211 // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
212 // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
213 // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
214@@ -53,11 +50,8 @@ void foo(){
215 // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
216 // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
217 // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
218- // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
219 // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
220- // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
221- // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
222- // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
223+ // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
224
225 int (^ block_B)(void) = ^{
226 return i;
227diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
228index 8445016..1566912 100644
229--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
230+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
231@@ -312,9 +312,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
232 };
233
234 // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
235- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
236- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
237- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
238+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
239 block_A();
240
241 // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
242@@ -333,15 +331,35 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
243 unsigned size = get_kernel_work_group_size(block_A);
244
245 // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
246- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
247- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
248- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
249+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
250 block_A();
251
252+ // Make sure that block invoke function is resolved correctly after sequence of assignements.
253+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
254+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
255+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
256+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
257+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1,
258+ bl_t b1 = block_G;
259+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
260+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
261+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
262+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
263+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2,
264+ bl_t b2 = b1;
265+ // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
266+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*)
267+ // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
268+ b2(0);
269+ // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
270+ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
271+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
272+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
273+ size = get_kernel_preferred_work_group_size_multiple(b2);
274+
275 void (^block_C)(void) = ^{
276 callee(i, a);
277 };
278-
279 // Emits block literal on stack and block kernel [[INVLK3]].
280 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
281 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
282@@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
283 // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
284 // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
285 // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
286+// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
287 // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
288 // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
289-// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
290 // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
291 // COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
292--
2931.8.3.1
294
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-Remove-extra-semicolon.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-Remove-extra-semicolon.patch
deleted file mode 100644
index 5f5a957e..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-Remove-extra-semicolon.patch
+++ /dev/null
@@ -1,27 +0,0 @@
1From a2c093c8db7b4e3a5612d0fcce9e4fd1756d6e4b Mon Sep 17 00:00:00 2001
2From: Alexey Sotkin <alexey.sotkin@intel.com>
3Date: Mon, 5 Aug 2019 18:18:01 +0300
4Subject: [PATCH] Remove extra semicolon
5
6Upstream-Status: Backport
7Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
8---
9 lib/SPIRV/libSPIRV/SPIRVEnum.h | 2 +-
10 1 file changed, 1 insertion(+), 1 deletion(-)
11
12diff --git a/lib/SPIRV/libSPIRV/SPIRVEnum.h b/lib/SPIRV/libSPIRV/SPIRVEnum.h
13index c93a484..3a071e7 100644
14--- a/lib/SPIRV/libSPIRV/SPIRVEnum.h
15+++ b/lib/SPIRV/libSPIRV/SPIRVEnum.h
16@@ -124,7 +124,7 @@ template <> inline void SPIRVMap<SPIRVExtensionKind, std::string>::init() {
17 add(SPV_INTEL_device_side_avc_motion_estimation,
18 "SPV_INTEL_device_side_avc_motion_estimation");
19 add(SPV_KHR_no_integer_wrap_decoration, "SPV_KHR_no_integer_wrap_decoration");
20-};
21+}
22
23 template <> inline void SPIRVMap<SPIRVExtInstSetKind, std::string>::init() {
24 add(SPIRVEIS_OpenCL, "OpenCL.std");
25--
262.7.4
27
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch
deleted file mode 100644
index 510c7c6e..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch
+++ /dev/null
@@ -1,61 +0,0 @@
1From 29e2813a2ab7d5569860bb07892dfef7b5374d96 Mon Sep 17 00:00:00 2001
2From: Yaxun Liu <Yaxun.Liu@amd.com>
3Date: Tue, 26 Feb 2019 16:20:41 +0000
4Subject: [PATCH] [OpenCL] Fix assertion due to blocks
5
6A recent change caused assertion in CodeGenFunction::EmitBlockCallExpr when a block is called.
7
8There is code
9
10 Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
11getCalleeDecl calls Expr::getReferencedDeclOfCallee, which does not handle
12BlockExpr and returns nullptr, which causes isa to assert.
13
14This patch fixes that.
15
16Differential Revision: https://reviews.llvm.org/D58658
17
18
19git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354893 91177308-0d34-0410-b5e6-96231b3b80d8
20
21Upstream-Status: Backport
22[https://github.com/llvm-mirror/clang/commit/29e2813a2ab7d5569860bb07892dfef7b5374d96]
23Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
24---
25 lib/AST/Expr.cpp | 2 ++
26 test/CodeGenOpenCL/blocks.cl | 6 ++++++
27 2 files changed, 8 insertions(+)
28
29diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
30index aef1eab..85690c7 100644
31--- a/lib/AST/Expr.cpp
32+++ b/lib/AST/Expr.cpp
33@@ -1358,6 +1358,8 @@ Decl *Expr::getReferencedDeclOfCallee() {
34 return DRE->getDecl();
35 if (MemberExpr *ME = dyn_cast<MemberExpr>(CEE))
36 return ME->getMemberDecl();
37+ if (auto *BE = dyn_cast<BlockExpr>(CEE))
38+ return BE->getBlockDecl();
39
40 return nullptr;
41 }
42diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
43index ab5a2c6..c3e2685 100644
44--- a/test/CodeGenOpenCL/blocks.cl
45+++ b/test/CodeGenOpenCL/blocks.cl
46@@ -90,6 +90,12 @@ int get42() {
47 return blockArgFunc(^{return 42;});
48 }
49
50+// COMMON-LABEL: define {{.*}}@call_block
51+// call {{.*}}@__call_block_block_invoke
52+int call_block() {
53+ return ^int(int num) { return num; } (11);
54+}
55+
56 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size"
57 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align"
58
59--
601.8.3.1
61
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch
new file mode 100644
index 00000000..cd519971
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch
@@ -0,0 +1,111 @@
1From eeb816d95f0910bd246e37bb2bb3923acf0edf6b Mon Sep 17 00:00:00 2001
2From: Aleksander Us <aleksander.us@intel.com>
3Date: Mon, 26 Aug 2019 15:47:41 +0300
4Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in
5 SplitBlockPredecessors.
6
7In case when BB is header of some loop and predecessor is latch of
8this loop, metadata was not attached to newly created basic block.
9This led to loss of loop metadata for other passes.
10
11Upstream-Status: Submitted [https://reviews.llvm.org/D66892]
12
13https://github.com/intel/llvm-patches/commit/8af4449e2d201707f7f2f832b473a0439e255f32
14
15Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
16---
17 lib/Transforms/Utils/BasicBlockUtils.cpp | 23 ++++++++----
18 test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++
19 2 files changed, 52 insertions(+), 7 deletions(-)
20 create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll
21
22diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
23index 5fa371377c8..3a90ae061fb 100644
24--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
25+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
26@@ -579,24 +579,33 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
27
28 // The new block unconditionally branches to the old block.
29 BranchInst *BI = BranchInst::Create(BB, NewBB);
30+ bool IsBBHeader = LI && LI->isLoopHeader(BB);
31+ Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr;
32 // Splitting the predecessors of a loop header creates a preheader block.
33- if (LI && LI->isLoopHeader(BB))
34+ if (IsBBHeader)
35 // Using the loop start line number prevents debuggers stepping into the
36 // loop body for this instruction.
37- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
38+ BI->setDebugLoc(BBLoop->getStartLoc());
39 else
40 BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
41
42 // Move the edges from Preds to point to NewBB instead of BB.
43- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
44+ for (BasicBlock *Pred : Preds) {
45+ Instruction *PI = Pred->getTerminator();
46 // This is slightly more strict than necessary; the minimum requirement
47 // is that there be no more than one indirectbr branching to BB. And
48 // all BlockAddress uses would need to be updated.
49- assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
50+ assert(!isa<IndirectBrInst>(PI) &&
51 "Cannot split an edge from an IndirectBrInst");
52- assert(!isa<CallBrInst>(Preds[i]->getTerminator()) &&
53- "Cannot split an edge from a CallBrInst");
54- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
55+ assert(!isa<CallBrInst>(PI) && "Cannot split an edge from a CallBrInst");
56+ if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) {
57+ // Update loop metadata if it exists.
58+ if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) {
59+ BI->setMetadata(LLVMContext::MD_loop, LoopMD);
60+ PI->setMetadata(LLVMContext::MD_loop, nullptr);
61+ }
62+ }
63+ PI->replaceUsesOfWith(BB, NewBB);
64 }
65
66 // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
67diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll
68new file mode 100644
69index 00000000000..c15c92fe3ae
70--- /dev/null
71+++ b/test/Transforms/LoopSimplify/loop_metadata.ll
72@@ -0,0 +1,36 @@
73+; RUN: opt -S -loop-simplify < %s | FileCheck %s
74+
75+; CHECK: for.cond.loopexit:
76+; CHECK: br label %for.cond, !llvm.loop !0
77+; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit
78+
79+define void @foo() {
80+entry:
81+ br label %for.cond
82+
83+for.cond: ; preds = %for.cond1, %entry
84+ %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ]
85+ %cmp = icmp ult i32 %j, 8
86+ br i1 %cmp, label %for.body, label %for.end
87+
88+for.body: ; preds = %for.cond
89+ %dummy1 = add i32 1, 1
90+ %add = add nuw nsw i32 %j, 1
91+ br label %for.cond1
92+
93+for.cond1: ; preds = %for.body1, %for.body
94+ %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ]
95+ %cmp1 = icmp ult i32 %i.0, 8
96+ br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0
97+
98+for.body1: ; preds = %for.cond1
99+ %dummy2 = add i32 1, 1
100+ %inc = add nuw nsw i32 %i.0, 1
101+ br label %for.cond1
102+
103+for.end: ; preds = %for.cond
104+ ret void
105+}
106+
107+!0 = distinct !{!0, !1}
108+!1 = !{!"llvm.loop.unroll.full"}
109--
1102.18.0
111
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch
new file mode 100644
index 00000000..48307deb
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch
@@ -0,0 +1,146 @@
1From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001
2From: Aleksander Us <aleksander.us@intel.com>
3Date: Mon, 26 Aug 2019 15:45:47 +0300
4Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in
5 LFTR when possible.
6
7SCEV analysis cannot properly cache instruction with poison flags
8(for example, add nsw outside of loop will not be reused by expander).
9This can lead to generating of additional instructions by SCEV expander.
10
11Example IR:
12
13 ...
14 %maxval = add nuw nsw i32 %a1, %a2
15 ...
16for.body:
17 ...
18 %cmp22 = icmp ult i32 %ivadd, %maxval
19 br i1 %cmp22, label %for.body, label %for.end
20 ...
21
22SCEV expander will generate copy of %maxval in preheader but without
23nuw/nsw flags. This can be avoided by explicit check that iv count
24value gives the same SCEV expressions as calculated by LFTR.
25
26Upstream-Status: Submitted [https://reviews.llvm.org/D66890]
27
28https://github.com/intel/llvm-patches/commit/fd6a6c97341a56fd21bc32bc940afea751312e8f
29
30Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
31---
32 lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++-
33 test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++
34 test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++-----
35 test/Transforms/IndVarSimplify/udiv.ll | 1 +
36 4 files changed, 38 insertions(+), 7 deletions(-)
37 create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll
38
39diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
40index f9fc698a4a9..5e04dac8aa6 100644
41--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
42+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
43@@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
44 if (UsePostInc)
45 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
46
47+ // If computed limit is equal to old limit then do not use SCEV expander
48+ // because it can lost NUW/NSW flags and create extra instructions.
49+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
50+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getOperand(0))) {
51+ Value *Limit = Cmp->getOperand(0);
52+ if (!L->isLoopInvariant(Limit))
53+ Limit = Cmp->getOperand(1);
54+ if (SE->getSCEV(Limit) == IVLimit)
55+ return Limit;
56+ }
57+
58 // Expand the code for the iteration count.
59 assert(SE->isLoopInvariant(IVLimit, L) &&
60 "Computed iteration count is not loop invariant!");
61@@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
62 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
63 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
64 IndVar->getType() : ExitCount->getType();
65- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
66 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
67 }
68 }
69diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll
70new file mode 100644
71index 00000000000..abd1cbb6c51
72--- /dev/null
73+++ b/test/Transforms/IndVarSimplify/add_nsw.ll
74@@ -0,0 +1,23 @@
75+; RUN: opt -indvars -S %s | FileCheck %s
76+
77+target datalayout = "e-p:32:32-i64:64-n8:16:32"
78+
79+; CHECK: for.body.preheader:
80+; CHECK-NOT: add
81+; CHECK: for.body:
82+
83+define void @foo(i32 %a1, i32 %a2) {
84+entry:
85+ %maxval = add nuw nsw i32 %a1, %a2
86+ %cmp = icmp slt i32 %maxval, 1
87+ br i1 %cmp, label %for.end, label %for.body
88+
89+for.body: ; preds = %entry, %for.body
90+ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ]
91+ %add31 = add nuw nsw i32 %j.02, 1
92+ %cmp22 = icmp slt i32 %add31, %maxval
93+ br i1 %cmp22, label %for.body, label %for.end
94+
95+for.end: ; preds = %for.body
96+ ret void
97+}
98diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
99index 14ae9738696..509d662b767 100644
100--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
101+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
102@@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
103 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]]
104 ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]]
105 ; CHECK: outer.preheader:
106-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1
107 ; CHECK-NEXT: br label [[OUTER:%.*]]
108 ; CHECK: outer:
109-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ]
110-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ]
111+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ]
112 ; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]]
113 ; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1
114 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]]
115@@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
116 ; CHECK: inner:
117 ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ]
118 ; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1
119-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]]
120+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]]
121 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]]
122 ; CHECK: outer.inc.loopexit:
123 ; CHECK-NEXT: br label [[OUTER_INC]]
124 ; CHECK: outer.inc:
125 ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1
126-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1
127-; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]]
128+; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]]
129 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]]
130 ; CHECK: exit.loopexit:
131 ; CHECK-NEXT: br label [[EXIT]]
132diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
133index b3f2c2a6a66..3530343ef4a 100644
134--- a/test/Transforms/IndVarSimplify/udiv.ll
135+++ b/test/Transforms/IndVarSimplify/udiv.ll
136@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
137 ; CHECK-LABEL: @foo(
138 ; CHECK: for.body.preheader:
139 ; CHECK-NOT: udiv
140+; CHECK: for.body:
141
142 define void @foo(double* %p, i64 %n) nounwind {
143 entry:
144--
1452.18.0
146
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
index 50ab25e0..8a2cc37f 100644
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
@@ -1,14 +1,11 @@
1FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:" 1FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:"
2 2
3SRC_URI_append_intel-x86-common = " \ 3SRC_URI_append_intel-x86-common = " \
4 file://0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch;patchdir=clang \
5 file://0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch;patchdir=clang \
6 file://0003-OpenCL-Fix-assertion-due-to-blocks.patch;patchdir=clang \
7 file://0001-dont-export-targets-for-binaries.patch \ 4 file://0001-dont-export-targets-for-binaries.patch \
8 git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_80;destsuffix=git/llvm/projects/llvm-spirv;name=spirv \ 5 file://BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch;patchdir=llvm \
9 file://0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch;patchdir=llvm/projects/llvm-spirv \ 6 file://IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch;patchdir=llvm \
10 file://0002-Remove-extra-semicolon.patch;patchdir=llvm/projects/llvm-spirv \ 7 git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_90;destsuffix=git/llvm/projects/llvm-spirv;name=spirv \
11 file://0001-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \ 8 file://0001-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \
12 " 9 "
13 10
14SRCREV_spirv = "1d48cd84d04a2f60b43ea3f66eb7c86f4e5973a9" 11SRCREV_spirv = "70420631144a6a25613ae37178f2cc1d3607b65b"