summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnuj Mittal <anuj.mittal@intel.com>2019-03-26 06:08:36 (GMT)
committerAnuj Mittal <anuj.mittal@intel.com>2019-05-17 11:33:24 (GMT)
commita12a2f03466cdc1d639f3d10a94fc22e0609c67c (patch)
tree8feaa49cb005541af143a602ba40bd153e6bf6c2
parent613bc1ffc315ac940dc99071bcf13565cd9a40ce (diff)
downloadmeta-intel-a12a2f03466cdc1d639f3d10a94fc22e0609c67c.tar.gz
clang: build Intel common-clang and spirv
Common clang is a thin wrapper library around clang. Common clang has OpenCL-oriented API and is capable to compile OpenCL C kernels to SPIR-V modules. This adds a bbappend to clang recipe from meta-clang to build the necessary components and moves it to dynamic layers so it's built only when clang-layer is included. Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
-rw-r--r--conf/layer.conf4
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend13
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch156
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch1119
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch66
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch59
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch294
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch61
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend16
9 files changed, 1788 insertions, 0 deletions
diff --git a/conf/layer.conf b/conf/layer.conf
index 3e75b0f..4cea550 100644
--- a/conf/layer.conf
+++ b/conf/layer.conf
@@ -20,5 +20,9 @@ LAYERRECOMMENDS_intel = "dpdk intel-qat"
20LAYERVERSION_intel = "5" 20LAYERVERSION_intel = "5"
21LAYERSERIES_COMPAT_intel = "thud warrior" 21LAYERSERIES_COMPAT_intel = "thud warrior"
22 22
23BBFILES_DYNAMIC += " \
24 clang-layer:${LAYERDIR}/dynamic-layers/clang-layer/*/*/*.bb \
25 clang-layer:${LAYERDIR}/dynamic-layers/clang-layer/*/*/*.bbappend \
26"
23 27
24require ${LAYERDIR}/conf/include/maintainers.inc 28require ${LAYERDIR}/conf/include/maintainers.inc
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend
new file mode 100644
index 0000000..f8d5a25
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend
@@ -0,0 +1,13 @@
1FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:"
2
3DEPENDS_append = " opencl-clang-native"
4LLVM_TARGETS_TO_BUILD = "X86"
5
6do_install_append_intel-x86-common() {
7 DESTDIR=${D} ninja -v install-cmake-exports
8}
9
10LIBCPLUSPLUS = ""
11
12# undefined reference to `__atomic_load' on i*86.
13COMPATIBLE_HOST = '(x86_64).*-linux'
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch
new file mode 100644
index 0000000..1c49140
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch
@@ -0,0 +1,156 @@
1From 39a3ac0065c23d1e2d55dfd8792cc28a146a4307 Mon Sep 17 00:00:00 2001
2From: Alexey Bader <alexey.bader@intel.com>
3Date: Tue, 19 Feb 2019 15:19:06 +0000
4Subject: [PATCH 1/2] [OpenCL] Change type of block pointer for OpenCL
5
6Summary:
7
8For some reason OpenCL blocks in LLVM IR are represented as function pointers.
9These pointers do not point to any real function and never get called. Actually
10they point to some structure, which in turn contains pointer to the real block
11invoke function.
12This patch changes represntation of OpenCL blocks in LLVM IR from function
13pointers to pointers to `%struct.__block_literal_generic`.
14Such representation allows to avoid unnecessary bitcasts and simplifies
15further processing (e.g. translation to SPIR-V ) of the module for targets
16which do not support function pointers.
17
18Patch by: Alexey Sotkin.
19
20Reviewers: Anastasia, yaxunl, svenvh
21
22Reviewed By: Anastasia
23
24Subscribers: alexbatashev, cfe-commits
25
26Tags: #clang
27
28Differential Revision: https://reviews.llvm.org/D58277
29
30git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354337 91177308-0d34-0410-b5e6-96231b3b80d8
31
32Upstream-Status: Backport
33[https://github.com/llvm-mirror/clang/commit/283f308bdb5893bab1f36791711346e746045f94]
34Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
35---
36 lib/CodeGen/CodeGenTypes.cpp | 4 +++-
37 test/CodeGenOpenCL/blocks.cl | 18 ++++++++----------
38 test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 18 +++++++++---------
39 3 files changed, 20 insertions(+), 20 deletions(-)
40
41diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
42index 2acf1ac..93b3ebf 100644
43--- a/lib/CodeGen/CodeGenTypes.cpp
44+++ b/lib/CodeGen/CodeGenTypes.cpp
45@@ -637,7 +637,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
46
47 case Type::BlockPointer: {
48 const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType();
49- llvm::Type *PointeeType = ConvertTypeForMem(FTy);
50+ llvm::Type *PointeeType = CGM.getLangOpts().OpenCL
51+ ? CGM.getGenericBlockLiteralType()
52+ : ConvertTypeForMem(FTy);
53 unsigned AS = Context.getTargetAddressSpace(FTy);
54 ResultType = llvm::PointerType::get(PointeeType, AS);
55 break;
56diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
57index 675240c..19aacc3 100644
58--- a/test/CodeGenOpenCL/blocks.cl
59+++ b/test/CodeGenOpenCL/blocks.cl
60@@ -35,11 +35,10 @@ void foo(){
61 // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
62 // SPIR: %[[i_value:.*]] = load i32, i32* %i
63 // SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
64- // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
65- // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
66- // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
67- // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
68- // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
69+ // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic*
70+ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
71+ // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
72+ // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
73 // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
74 // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
75 // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
76@@ -50,11 +49,10 @@ void foo(){
77 // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
78 // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
79 // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
80- // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
81- // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
82- // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
83- // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
84- // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic*
85+ // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)*
86+ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
87+ // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
88+ // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
89 // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
90 // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
91 // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
92diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
93index 4732194..8445016 100644
94--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
95+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
96@@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t;
97
98 // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
99 // COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
100-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
101+// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*)
102
103 // For anonymous blocks without captures, emit block literals as global variable.
104 // COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
105@@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
106 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
107 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
108 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
109- // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
110- // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
111- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
112+ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic*
113+ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
114+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
115 // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
116 // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
117 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
118@@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
119 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
120 // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
121 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
122- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
123- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
124+ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic*
125+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
126 // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
127 // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
128 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
129@@ -300,13 +300,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
130 // Emits global block literal [[BLG8]] and invoke function [[INVG8]].
131 // The full type of these expressions are long (and repeated elsewhere), so we
132 // capture it as part of the regex for convenience and clarity.
133- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
134+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A
135 void (^const block_A)(void) = ^{
136 return;
137 };
138
139 // Emits global block literal [[BLG9]] and invoke function [[INVG9]].
140- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
141+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B
142 void (^const block_B)(local void *) = ^(local void *a) {
143 return;
144 };
145@@ -346,7 +346,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
146 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
147 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
148 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
149- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
150+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
151 // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
152 // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
153 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
154--
1551.8.3.1
156
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
new file mode 100644
index 0000000..4a52867
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
@@ -0,0 +1,1119 @@
1From 9ce0fe02fd6cda5fb29fbb0d5037a1798a810b8a Mon Sep 17 00:00:00 2001
2From: Alexey Sotkin <alexey.sotkin@intel.com>
3Date: Thu, 21 Feb 2019 17:14:36 +0300
4Subject: [PATCH 1/3] Update LowerOpenCL pass to handle new blocks
5 represntation in LLVM IR
6
7Upstream-Status: Backport
8[https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/bd6ddfaf7232cd81c7f2fe9877e66f286731bd8e]
9Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
10---
11 lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 413 ++++++++----------------------
12 test/global_block.ll | 71 ++---
13 test/literal-struct.ll | 31 ++-
14 test/transcoding/block_w_struct_return.ll | 47 ++--
15 test/transcoding/enqueue_kernel.ll | 237 ++++++++++-------
16 5 files changed, 317 insertions(+), 482 deletions(-)
17
18diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
19index 50e1838..b42a4ec 100644
20--- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
21+++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
22@@ -1,303 +1,110 @@
23-//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===//
24-//
25-// The LLVM/SPIRV Translator
26-//
27-// This file is distributed under the University of Illinois Open Source
28-// License. See LICENSE.TXT for details.
29-//
30-// Copyright (c) 2018 Intel Corporation. All rights reserved.
31-//
32-// Permission is hereby granted, free of charge, to any person obtaining a
33-// copy of this software and associated documentation files (the "Software"),
34-// to deal with the Software without restriction, including without limitation
35-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
36-// and/or sell copies of the Software, and to permit persons to whom the
37-// Software is furnished to do so, subject to the following conditions:
38-//
39-// Redistributions of source code must retain the above copyright notice,
40-// this list of conditions and the following disclaimers.
41-// Redistributions in binary form must reproduce the above copyright notice,
42-// this list of conditions and the following disclaimers in the documentation
43-// and/or other materials provided with the distribution.
44-// Neither the names of Intel Corporation, nor the names of its
45-// contributors may be used to endorse or promote products derived from this
46-// Software without specific prior written permission.
47-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
48-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
49-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
50-// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
52-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
53-// THE SOFTWARE.
54-//
55-//===----------------------------------------------------------------------===//
56-//
57-// SPIR-V specification doesn't allow function pointers, so SPIR-V translator
58-// is designed to fail if a value with function type (except calls) is occured.
59-// Currently there is only two cases, when function pointers are generating in
60-// LLVM IR in OpenCL - block calls and device side enqueue built-in calls.
61-//
62-// In both cases values with function type used as intermediate representation
63-// for block literal structure.
64-//
65-// This pass is designed to find such cases and simplify them to avoid any
66-// function pointer types occurrences in LLVM IR in 4 steps.
67-//
68-// 1. Find all function pointer allocas, like
69-// %block = alloca void () *
70-//
71-// Then find a single store to that alloca:
72-// %blockLit = alloca <{ i32, i32, ...}>, align 4
73-// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()*
74-// > store void ()* %0, void ()** %block, align 4
75-//
76-// And replace the alloca users by new instructions which used stored value
77-// %blockLit itself instead of function pointer alloca %block.
78-//
79-// 2. Find consecutive casts from block literal type to i8 addrspace(4)*
80-// used function pointers as an intermediate type:
81-// %0 = bitcast <{ i32, i32 }> %block to void() *
82-// %1 = addrspacecast void() * %0 to i8 addrspace(4)*
83-// And simplify them:
84-// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)*
85-//
86-// 3. Find all unused instructions with function pointer type occured after
87-// pp.1-2 and remove them.
88-//
89-// 4. Find unused globals with function pointer type, like
90-// @block = constant void ()*
91-// bitcast ({ i32, i32 }* @__block_literal_global to void ()*
92-//
93-// And remove them.
94-//
95-//===----------------------------------------------------------------------===//
96-#define DEBUG_TYPE "spv-lower-ocl-blocks"
97-
98-#include "OCLUtil.h"
99-#include "SPIRVInternal.h"
100-
101-#include "llvm/ADT/SetVector.h"
102-#include "llvm/Analysis/ValueTracking.h"
103-#include "llvm/IR/GlobalVariable.h"
104-#include "llvm/IR/InstIterator.h"
105-#include "llvm/IR/Module.h"
106-#include "llvm/Pass.h"
107-#include "llvm/PassSupport.h"
108-#include "llvm/Support/Casting.h"
109-
110-using namespace llvm;
111-
112-namespace {
113-
114-static void
115-removeUnusedFunctionPtrInst(Instruction *I,
116- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
117- for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) {
118- Instruction *OpI = dyn_cast<Instruction>(I->getOperand(OpIdx));
119- I->setOperand(OpIdx, nullptr);
120- if (OpI && OpI != I && OpI->user_empty())
121- FuncPtrInsts.insert(OpI);
122- }
123- I->eraseFromParent();
124-}
125-
126-static bool isFuncPtrAlloca(const AllocaInst *AI) {
127- auto *ET = dyn_cast<PointerType>(AI->getAllocatedType());
128- return ET && ET->getElementType()->isFunctionTy();
129-}
130-
131-static bool hasFuncPtrType(const Value *V) {
132- auto *PT = dyn_cast<PointerType>(V->getType());
133- return PT && PT->getElementType()->isFunctionTy();
134-}
135-
136-static bool isFuncPtrInst(const Instruction *I) {
137- if (auto *AI = dyn_cast<AllocaInst>(I))
138- return isFuncPtrAlloca(AI);
139-
140- for (auto &Op : I->operands()) {
141- if (auto *AI = dyn_cast<AllocaInst>(Op))
142- return isFuncPtrAlloca(AI);
143-
144- auto *OpI = dyn_cast<Instruction>(&Op);
145- if (OpI && OpI != I && hasFuncPtrType(OpI))
146- return true;
147- }
148- return false;
149-}
150-
151-static StoreInst *findSingleStore(AllocaInst *AI) {
152- StoreInst *Store = nullptr;
153- for (auto *U : AI->users()) {
154- if (!isa<StoreInst>(U))
155- continue; // not a store
156- if (Store)
157- return nullptr; // there are more than one stores
158- Store = dyn_cast<StoreInst>(U);
159- }
160- return Store;
161-}
162-
163-static void fixFunctionPtrAllocaUsers(AllocaInst *AI) {
164- // Find and remove a single store to alloca
165- auto *SingleStore = findSingleStore(AI);
166- assert(SingleStore && "More than one store to the function pointer alloca");
167- auto *StoredVal = SingleStore->getValueOperand();
168- SingleStore->eraseFromParent();
169-
170- // Find loads from the alloca and replace thier users
171- for (auto *U : AI->users()) {
172- auto *LI = dyn_cast<LoadInst>(U);
173- if (!LI)
174- continue;
175-
176- for (auto *U : LI->users()) {
177- auto *UInst = cast<Instruction>(U);
178- auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast(
179- StoredVal, UInst->getType(), "", UInst);
180- UInst->replaceAllUsesWith(Cast);
181- }
182- }
183-}
184-
185-static int getBlockLiteralIdx(const Function &F) {
186- StringRef FName = F.getName();
187- if (isEnqueueKernelBI(FName))
188- return FName.contains("events") ? 7 : 4;
189- if (isKernelQueryBI(FName))
190- return FName.contains("for_ndrange") ? 2 : 1;
191- if (FName.startswith("__") && FName.contains("_block_invoke"))
192- return F.hasStructRetAttr() ? 1 : 0;
193-
194- return -1; // No block literal argument
195-}
196-
197-static bool hasBlockLiteralArg(const Function &F) {
198- return getBlockLiteralIdx(F) != -1;
199-}
200-
201-static bool simplifyFunctionPtrCasts(Function &F) {
202- bool Changed = false;
203- int BlockLiteralIdx = getBlockLiteralIdx(F);
204- for (auto *U : F.users()) {
205- auto *Call = dyn_cast<CallInst>(U);
206- if (!Call)
207- continue;
208- if (Call->getFunction()->getName() == F.getName().str() + "_kernel")
209- continue; // Skip block invoke function calls inside block invoke kernels
210-
211- const DataLayout &DL = F.getParent()->getDataLayout();
212- auto *BlockLiteral = Call->getOperand(BlockLiteralIdx);
213- auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL);
214- if (isa<GlobalVariable>(BlockLiteralVal))
215- continue; // nothing to do with globals
216-
217- auto *BlockLiteralAlloca = cast<AllocaInst>(BlockLiteralVal);
218- assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() &&
219- "Function type shouldn't be there");
220-
221- auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast(
222- BlockLiteralAlloca, BlockLiteral->getType(), "", Call);
223- BlockLiteral->replaceAllUsesWith(NewBlockLiteral);
224- Changed |= true;
225- }
226- return Changed;
227-}
228-
229-static void
230-findFunctionPtrAllocas(Module &M,
231- SmallVectorImpl<AllocaInst *> &FuncPtrAllocas) {
232- for (auto &F : M) {
233- if (F.isDeclaration())
234- continue;
235- for (auto &I : instructions(F)) {
236- auto *AI = dyn_cast<AllocaInst>(&I);
237- if (!AI || !isFuncPtrAlloca(AI))
238- continue;
239- FuncPtrAllocas.push_back(AI);
240- }
241- }
242-}
243-
244-static void
245-findUnusedFunctionPtrInsts(Module &M,
246- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
247- for (auto &F : M) {
248- if (F.isDeclaration())
249- continue;
250- for (auto &I : instructions(F))
251- if (I.user_empty() && isFuncPtrInst(&I))
252- FuncPtrInsts.insert(&I);
253- }
254-}
255-
256-static void
257-findUnusedFunctionPtrGlbs(Module &M,
258- SmallVectorImpl<GlobalVariable *> &FuncPtrGlbs) {
259- for (auto &GV : M.globals()) {
260- if (!GV.user_empty())
261- continue;
262- auto *GVType = dyn_cast<PointerType>(GV.getType()->getElementType());
263- if (GVType && GVType->getElementType()->isFunctionTy())
264- FuncPtrGlbs.push_back(&GV);
265- }
266-}
267-
268-class SPIRVLowerOCLBlocks : public ModulePass {
269-
270-public:
271- SPIRVLowerOCLBlocks() : ModulePass(ID) {}
272-
273- bool runOnModule(Module &M) {
274- bool Changed = false;
275-
276- // 1. Find function pointer allocas and fix their users
277- SmallVector<AllocaInst *, 16> FuncPtrAllocas;
278- findFunctionPtrAllocas(M, FuncPtrAllocas);
279-
280- Changed |= !FuncPtrAllocas.empty();
281- for (auto *AI : FuncPtrAllocas)
282- fixFunctionPtrAllocaUsers(AI);
283-
284- // 2. Simplify consecutive casts which use function pointer types
285- for (auto &F : M)
286- if (hasBlockLiteralArg(F))
287- Changed |= simplifyFunctionPtrCasts(F);
288-
289- // 3. Cleanup unused instructions with function pointer type
290- // which are occured after pp. 1-2
291- SmallSetVector<Instruction *, 16> FuncPtrInsts;
292- findUnusedFunctionPtrInsts(M, FuncPtrInsts);
293-
294- Changed |= !FuncPtrInsts.empty();
295- while (!FuncPtrInsts.empty()) {
296- Instruction *I = FuncPtrInsts.pop_back_val();
297- removeUnusedFunctionPtrInst(I, FuncPtrInsts);
298- }
299-
300- // 4. Find and remove unused global variables with function pointer type
301- SmallVector<GlobalVariable *, 16> FuncPtrGlbs;
302- findUnusedFunctionPtrGlbs(M, FuncPtrGlbs);
303-
304- Changed |= !FuncPtrGlbs.empty();
305- for (auto *GV : FuncPtrGlbs)
306- GV->eraseFromParent();
307-
308- return Changed;
309- }
310-
311- static char ID;
312-}; // class SPIRVLowerOCLBlocks
313-
314-char SPIRVLowerOCLBlocks::ID = 0;
315-
316-} // namespace
317-
318-INITIALIZE_PASS(
319- SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks",
320- "Remove function pointers occured in case of using OpenCL blocks", false,
321- false)
322-
323-llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() {
324- return new SPIRVLowerOCLBlocks();
325-}
326+//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===//
327+//
328+// The LLVM/SPIRV Translator
329+//
330+// This file is distributed under the University of Illinois Open Source
331+// License. See LICENSE.TXT for details.
332+//
333+// Copyright (c) 2018 Intel Corporation. All rights reserved.
334+//
335+// Permission is hereby granted, free of charge, to any person obtaining a
336+// copy of this software and associated documentation files (the "Software"),
337+// to deal with the Software without restriction, including without limitation
338+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
339+// and/or sell copies of the Software, and to permit persons to whom the
340+// Software is furnished to do so, subject to the following conditions:
341+//
342+// Redistributions of source code must retain the above copyright notice,
343+// this list of conditions and the following disclaimers.
344+// Redistributions in binary form must reproduce the above copyright notice,
345+// this list of conditions and the following disclaimers in the documentation
346+// and/or other materials provided with the distribution.
347+// Neither the names of Intel Corporation, nor the names of its
348+// contributors may be used to endorse or promote products derived from this
349+// Software without specific prior written permission.
350+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
351+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
352+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
353+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
354+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
355+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
356+// THE SOFTWARE.
357+//
358+//===----------------------------------------------------------------------===//
359+//
360+// SPIR-V specification doesn't allow function pointers, so SPIR-V translator
361+// is designed to fail if a value with function type (except calls) is occured.
362+// Currently there is only two cases, when function pointers are generating in
363+// LLVM IR in OpenCL - block calls and device side enqueue built-in calls.
364+//
365+// In both cases values with function type used as intermediate representation
366+// for block literal structure.
367+//
368+// In LLVM IR produced by clang, blocks are represented with the following
369+// structure:
370+// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
371+// Pointers to block invoke functions are stored in the third field. Clang
372+// replaces inderect function calls in all cases except if block is passed as a
373+// function argument. Note that it is somewhat unclear if the OpenCL C spec
374+// should allow passing blocks as function argumernts. This pass is not supposed
375+// to work correctly with such functions.
376+// Clang though has to store function pointers to this structure. Purpose of
377+// this pass is to replace store of function pointers(not allowed in SPIR-V)
378+// with null pointers.
379+//
380+//===----------------------------------------------------------------------===//
381+#define DEBUG_TYPE "spv-lower-ocl-blocks"
382+
383+#include "SPIRVInternal.h"
384+
385+#include "llvm/IR/Module.h"
386+#include "llvm/Pass.h"
387+#include "llvm/Support/Regex.h"
388+
389+using namespace llvm;
390+
391+namespace {
392+
393+static bool isBlockInvoke(Function &F) {
394+ static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$");
395+ return BlockInvokeRegex.match(F.getName());
396+}
397+
398+class SPIRVLowerOCLBlocks : public ModulePass {
399+
400+public:
401+ SPIRVLowerOCLBlocks() : ModulePass(ID) {}
402+
403+ bool runOnModule(Module &M) {
404+ bool Changed = false;
405+ for (Function &F : M) {
406+ if (!isBlockInvoke(F))
407+ continue;
408+ for (User *U : F.users()) {
409+ if (!isa<Constant>(U))
410+ continue;
411+ Constant *Null = Constant::getNullValue(U->getType());
412+ if (U != Null) {
413+ U->replaceAllUsesWith(Null);
414+ Changed = true;
415+ }
416+ }
417+ }
418+ return Changed;
419+ }
420+
421+ static char ID;
422+};
423+
424+char SPIRVLowerOCLBlocks::ID = 0;
425+
426+} // namespace
427+
428+INITIALIZE_PASS(
429+ SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks",
430+ "Remove function pointers occured in case of using OpenCL blocks", false,
431+ false)
432+
433+llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() {
434+ return new SPIRVLowerOCLBlocks();
435+}
436diff --git a/test/global_block.ll b/test/global_block.ll
437index a9267d8..efb4cf3 100644
438--- a/test/global_block.ll
439+++ b/test/global_block.ll
440@@ -16,7 +16,7 @@
441 ; RUN: llvm-spirv %t.bc -o %t.spv
442 ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM
443
444-target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
445+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
446 target triple = "spir-unknown-unknown"
447
448 ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke"
449@@ -26,71 +26,56 @@ target triple = "spir-unknown-unknown"
450 ; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]]
451 ; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]]
452
453-;; This variable is not needed in SPIRV
454-; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1
455-; CHECK-LLVM-NOT: @block_kernel.b1
456-@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8
457+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
458
459-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
460+@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4
461+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
462
463-; Function Attrs: convergent nounwind
464-define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
465+; Function Attrs: convergent noinline nounwind optnone
466+define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
467 entry:
468- %res.addr = alloca i32 addrspace(1)*, align 8
469- store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
470-
471+ %res.addr = alloca i32 addrspace(1)*, align 4
472+ store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4
473 ; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]]
474 ; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5)
475- %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
476-
477- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
478- store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14
479+ %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
480+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4
481+ store i32 %call, i32 addrspace(1)* %0, align 4
482 ret void
483 }
484
485-; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]]
486+; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]]
487 ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}}
488 ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}}
489 ; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}})
490-; Function Attrs: convergent nounwind
491+; Function Attrs: convergent noinline nounwind optnone
492 define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 {
493 entry:
494- %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
495+ %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
496 %i.addr = alloca i32, align 4
497- store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
498- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
499- store i32 %i, i32* %i.addr, align 4, !tbaa !14
500- %0 = load i32, i32* %i.addr, align 4, !tbaa !14
501+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
502+ store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
503+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
504+ store i32 %i, i32* %i.addr, align 4
505+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
506+ %0 = load i32, i32* %i.addr, align 4
507 %add = add nsw i32 %0, 1
508 ret i32 %add
509 }
510
511-attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
512-attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
513+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
514+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
515 attributes #2 = { convergent }
516
517 !llvm.module.flags = !{!0}
518-!opencl.enable.FP_CONTRACT = !{}
519 !opencl.ocl.version = !{!1}
520 !opencl.spir.version = !{!1}
521-!opencl.used.extensions = !{!2}
522-!opencl.used.optional.core.features = !{!2}
523-!opencl.compiler.options = !{!2}
524-!llvm.ident = !{!3}
525+!llvm.ident = !{!2}
526
527 !0 = !{i32 1, !"wchar_size", i32 4}
528 !1 = !{i32 2, i32 0}
529-!2 = !{}
530-!3 = !{!"clang version 7.0.0"}
531-!4 = !{i32 1}
532-!5 = !{!"none"}
533-!6 = !{!"int*"}
534-!7 = !{!""}
535-!8 = !{i1 false}
536-!9 = !{i32 0}
537-!10 = !{!11, !11, i64 0}
538-!11 = !{!"any pointer", !12, i64 0}
539-!12 = !{!"omnipotent char", !13, i64 0}
540-!13 = !{!"Simple C/C++ TBAA"}
541-!14 = !{!15, !15, i64 0}
542-!15 = !{!"int", !12, i64 0}
543+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
544+!3 = !{i32 1}
545+!4 = !{!"none"}
546+!5 = !{!"int*"}
547+!6 = !{!""}
548diff --git a/test/literal-struct.ll b/test/literal-struct.ll
549index c52170a..52a731a 100644
550--- a/test/literal-struct.ll
551+++ b/test/literal-struct.ll
552@@ -2,7 +2,7 @@
553 ; structs, i.e. structs whose type has no name. Typicaly clang generate such
554 ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with
555 ; the following command:
556-; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll
557+; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll
558
559 ; literal-struct.cl:
560 ; void foo()
561@@ -14,25 +14,28 @@
562 ; RUN: llvm-as < %s | llvm-spirv -spirv-text -o %t
563 ; RUN: FileCheck < %t %s
564
565-; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0
566-; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}}
567+; CHECK: TypeInt [[Int:[0-9]+]] 32 0
568+; CHECK: TypeInt [[Int8:[0-9]+]] 8 0
569+; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]]
570+; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]]
571
572 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
573 target triple = "spir"
574
575-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
576+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
577+
578+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
579 ; CHECK: ConstantComposite [[StructType]]
580
581-; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct.
582-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4
583+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4
584 ; CHECK: ConstantNull [[StructType]]
585
586 ; Function Attrs: convergent noinline nounwind optnone
587 define spir_func void @foo() #0 {
588 entry:
589- %myBlock = alloca void () addrspace(4)*, align 4
590- store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4
591- call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
592+ %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4
593+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4
594+ call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
595 ret void
596 }
597
598@@ -40,14 +43,14 @@ entry:
599 define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 {
600 entry:
601 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
602- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
603+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
604 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
605- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
606- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
607+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
608+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
609 ret void
610 }
611
612-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
613+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
614 attributes #1 = { convergent }
615
616 !llvm.module.flags = !{!0}
617@@ -57,4 +60,4 @@ attributes #1 = { convergent }
618
619 !0 = !{i32 1, !"wchar_size", i32 4}
620 !1 = !{i32 2, i32 0}
621-!2 = !{!"clang version 8.0.0 "}
622+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
623diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll
624index 76e29f0..df89b13 100644
625--- a/test/transcoding/block_w_struct_return.ll
626+++ b/test/transcoding/block_w_struct_return.ll
627@@ -16,6 +16,8 @@
628 ; res[tid] = kernelBlock(aa).a - 6;
629 ; }
630
631+; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll
632+
633 ; RUN: llvm-as %s -o %t.bc
634 ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt
635 ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV
636@@ -27,12 +29,14 @@
637 ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke"
638
639 ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32
640+; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8
641+; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]]
642 ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]]
643 ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]]
644
645 ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7
646 ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7
647-; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}}
648+; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}}
649 ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]]
650
651 ; CHECK-LLVM: %[[StructA:.*]] = type { i32 }
652@@ -41,20 +45,21 @@
653 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
654 target triple = "spir64-unknown-unknown"
655
656+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
657 %struct.A = type { i32 }
658
659-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
660+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8
661
662 ; Function Attrs: convergent noinline nounwind optnone
663-define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 {
664+define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
665 entry:
666 %res.addr = alloca i32 addrspace(1)*, align 8
667- %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8
668+ %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8
669 %tid = alloca i64, align 8
670 %aa = alloca %struct.A, align 4
671 %tmp = alloca %struct.A, align 4
672 store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8
673- store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8
674+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8
675 %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4
676 store i64 %call, i64* %tid, align 8
677 %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8
678@@ -63,7 +68,7 @@ entry:
679 store i32 -1, i32 addrspace(1)* %arrayidx, align 4
680 %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0
681 store i32 5, i32* %a, align 4
682- call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
683+ call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
684 %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0
685 %2 = load i32, i32* %a1, align 4
686 %sub = sub nsw i32 %2, 6
687@@ -78,10 +83,10 @@ entry:
688 define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 {
689 entry:
690 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
691- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8
692+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8
693 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
694- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
695- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8
696+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
697+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8
698 %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0
699 store i32 6, i32* %a1, align 4
700 %0 = bitcast %struct.A* %agg.result to i8*
701@@ -96,30 +101,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r
702 ; Function Attrs: convergent nounwind readnone
703 declare spir_func i64 @_Z13get_global_idj(i32) #3
704
705-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
706-attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
707+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
708+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
709 attributes #2 = { argmemonly nounwind }
710 attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
711 attributes #4 = { convergent nounwind readnone }
712 attributes #5 = { convergent }
713
714 !llvm.module.flags = !{!0}
715-!opencl.enable.FP_CONTRACT = !{}
716 !opencl.ocl.version = !{!1}
717 !opencl.spir.version = !{!1}
718-!opencl.used.extensions = !{!2}
719-!opencl.used.optional.core.features = !{!2}
720-!opencl.compiler.options = !{!2}
721-!llvm.ident = !{!3}
722+!llvm.ident = !{!2}
723
724 !0 = !{i32 1, !"wchar_size", i32 4}
725 !1 = !{i32 2, i32 0}
726-!2 = !{}
727-!3 = !{!"clang version 7.0.0"}
728-!4 = !{i32 1}
729-!5 = !{!"none"}
730-!6 = !{!"int*"}
731-!7 = !{!""}
732-!8 = !{i1 false}
733-!9 = !{i32 0}
734-
735+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
736+!3 = !{i32 1}
737+!4 = !{!"none"}
738+!5 = !{!"int*"}
739+!6 = !{!""}
740diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll
741index 0d29c71..435871d 100644
742--- a/test/transcoding/enqueue_kernel.ll
743+++ b/test/transcoding/enqueue_kernel.ll
744@@ -51,11 +51,12 @@
745 ; ModuleID = 'enqueue_kernel.cl'
746 source_filename = "enqueue_kernel.cl"
747 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
748-target triple = "spir-unknown-unknown"
749+target triple = "spir"
750
751 %opencl.queue_t = type opaque
752 %struct.ndrange_t = type { i32 }
753 %opencl.clk_event_t = type opaque
754+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
755
756 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel"
757 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel"
758@@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown"
759
760 ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32
761 ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8
762-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
763 ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0
764-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17
765+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21
766 ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2
767-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20
768-; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
769+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
770+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24
771
772 ; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}}
773+; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
774+; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
775 ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]]
776 ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]]
777-; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
778 ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]]
779 ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
780 ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
781 ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
782 ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]]
783
784-; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32 }
785-; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }>
786-; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
787-; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32 }>
788+; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32, i8 addrspace(4)* }
789+; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>
790+; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
791+; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)* }>
792
793-; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
794-; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
795+; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
796+; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
797
798-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
799-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
800+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4
801+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4
802
803 ; Function Attrs: convergent noinline nounwind optnone
804-define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
805+define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
806 entry:
807+ %a.addr = alloca i32 addrspace(1)*, align 4
808+ %b.addr = alloca i32 addrspace(1)*, align 4
809+ %i.addr = alloca i32, align 4
810+ %c0.addr = alloca i8, align 1
811 %default_queue = alloca %opencl.queue_t*, align 4
812 %flags = alloca i32, align 4
813 %ndrange = alloca %struct.ndrange_t, align 4
814 %clk_event = alloca %opencl.clk_event_t*, align 4
815 %event_wait_list = alloca %opencl.clk_event_t*, align 4
816 %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4
817- %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4
818- %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
819+ %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4
820+ %tmp = alloca %struct.ndrange_t, align 4
821+ %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
822+ %tmp4 = alloca %struct.ndrange_t, align 4
823 %c = alloca i8, align 1
824+ %tmp11 = alloca %struct.ndrange_t, align 4
825+ %block_sizes = alloca [1 x i32], align 4
826+ %tmp12 = alloca %struct.ndrange_t, align 4
827+ %block_sizes13 = alloca [3 x i32], align 4
828+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
829+ store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4
830+ store i32 %i, i32* %i.addr, align 4
831+ store i8 %c0, i8* %c0.addr, align 1
832 store i32 0, i32* %flags, align 4
833 %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
834 %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4
835 store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4
836 %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
837 %2 = load i32, i32* %flags, align 4
838- %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
839- store i32 17, i32* %block.size, align 4
840- %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
841+ %3 = bitcast %struct.ndrange_t* %tmp to i8*
842+ %4 = bitcast %struct.ndrange_t* %ndrange to i8*
843+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false)
844+ %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
845+ store i32 21, i32* %block.size, align 4
846+ %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
847 store i32 4, i32* %block.align, align 4
848- %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
849- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4
850- %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
851- store i32 %i, i32* %block.captured1, align 4
852- %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
853- store i8 %c0, i8* %block.captured2, align 4
854- %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()*
855- %4 = addrspacecast void ()* %3 to i8 addrspace(4)*
856+ %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
857+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4
858+ %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
859+ %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
860+ store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4
861+ %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
862+ %6 = load i32, i32* %i.addr, align 4
863+ store i32 %6, i32* %block.captured1, align 4
864+ %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5
865+ %7 = load i8, i8* %c0.addr, align 1
866+ store i8 %7, i8* %block.captured2, align 4
867+ %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic*
868+ %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)*
869
870 ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]]
871 ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}}
872 ; [[ConstInt0]] [[EventNull]] [[EventNull]]
873 ; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]]
874
875-; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)*
876+; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic*
877+; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)*
878 ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)*
879-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]])
880-
881- %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4)
882- %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
883- %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
884- %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
885- store i32 20, i32* %block.size5, align 4
886- %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
887+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]])
888+
889+ %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
890+ %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
891+ %12 = load i32, i32* %flags, align 4
892+ %13 = bitcast %struct.ndrange_t* %tmp4 to i8*
893+ %14 = bitcast %struct.ndrange_t* %ndrange to i8*
894+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false)
895+ %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
896+ %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
897+ %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
898+ store i32 24, i32* %block.size5, align 4
899+ %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
900 store i32 4, i32* %block.align6, align 4
901- %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
902- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4
903- %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
904- store i32 %i, i32* %block.captured8, align 4
905- %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
906- store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4
907- %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()*
908- %9 = addrspacecast void ()* %8 to i8 addrspace(4)*
909+ %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
910+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4
911+ %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
912+ %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
913+ store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4
914+ %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
915+ %18 = load i32, i32* %i.addr, align 4
916+ store i32 %18, i32* %block.captured9, align 4
917+ %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5
918+ %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4
919+ store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4
920+ %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic*
921+ %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)*
922+
923
924 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]]
925 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]]
926@@ -158,16 +193,24 @@ entry:
927 ; [[ConstInt2]] [[Event1]] [[Event2]]
928 ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]]
929
930-; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)*
931+; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic*
932+; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4)
933 ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)*
934-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]])
935-
936- %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
937- %11 = alloca [1 x i32]
938- %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0
939- %13 = load i8, i8* %c, align 1
940- %14 = zext i8 %13 to i32
941- store i32 %14, i32* %12, align 4
942+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]])
943+
944+ %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21)
945+ %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
946+ %24 = load i32, i32* %flags, align 4
947+ %25 = bitcast %struct.ndrange_t* %tmp11 to i8*
948+ %26 = bitcast %struct.ndrange_t* %ndrange to i8*
949+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false)
950+ %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
951+ %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)*
952+ %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
953+ %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0
954+ %30 = load i8, i8* %c, align 1
955+ %31 = zext i8 %30 to i32
956+ store i32 %31, i32* %29, align 4
957
958 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]]
959 ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]]
960@@ -182,14 +225,18 @@ entry:
961 ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)*
962 ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}})
963
964- %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12)
965- %16 = alloca [3 x i32]
966- %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0
967- store i32 1, i32* %17, align 4
968- %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1
969- store i32 2, i32* %18, align 4
970- %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2
971- store i32 4, i32* %19, align 4
972+ %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29)
973+ %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
974+ %34 = load i32, i32* %flags, align 4
975+ %35 = bitcast %struct.ndrange_t* %tmp12 to i8*
976+ %36 = bitcast %struct.ndrange_t* %ndrange to i8*
977+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false)
978+ %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0
979+ store i32 1, i32* %37, align 4
980+ %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1
981+ store i32 2, i32* %38, align 4
982+ %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2
983+ store i32 4, i32* %39, align 4
984
985 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]]
986 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]]
987@@ -206,24 +253,27 @@ entry:
988 ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)*
989 ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}})
990
991- %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17)
992+ %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37)
993 ret void
994 }
995
996+; Function Attrs: argmemonly nounwind
997+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1
998+
999 ; Function Attrs: convergent noinline nounwind optnone
1000 define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 {
1001 entry:
1002 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1003- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
1004+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
1005 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1006- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
1007- store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
1008- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
1009+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
1010+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
1011+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5
1012 %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4
1013 %conv = sext i8 %0 to i32
1014- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2
1015+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
1016 %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4
1017- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
1018+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
1019 %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4
1020 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2
1021 store i32 %conv, i32 addrspace(1)* %arrayidx, align 4
1022@@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i
1023 define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 {
1024 entry:
1025 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1026- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
1027+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
1028 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1029- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
1030- store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
1031- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1032+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
1033+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
1034+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5
1035 %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4
1036- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1037+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1038 %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4
1039 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1
1040 %2 = load i32, i32 addrspace(1)* %arrayidx, align 4
1041- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2
1042+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1043 %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4
1044- %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1045+ %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1046 %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4
1047 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4
1048 store i32 %2, i32 addrspace(1)* %arrayidx4, align 4
1049@@ -276,11 +326,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac
1050 entry:
1051 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1052 %p.addr = alloca i8 addrspace(3)*, align 4
1053- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
1054+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
1055 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1056- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
1057+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
1058 store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4
1059- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
1060+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
1061 ret void
1062 }
1063
1064@@ -300,13 +350,13 @@ entry:
1065 %p1.addr = alloca i8 addrspace(3)*, align 4
1066 %p2.addr = alloca i8 addrspace(3)*, align 4
1067 %p3.addr = alloca i8 addrspace(3)*, align 4
1068- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
1069+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
1070 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1071- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
1072+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
1073 store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4
1074 store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4
1075 store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4
1076- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
1077+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
1078 ret void
1079 }
1080
1081@@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*,
1082 ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*)
1083 ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)
1084
1085-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
1086+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
1087 attributes #1 = { argmemonly nounwind }
1088-attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
1089+attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
1090 attributes #3 = { nounwind }
1091
1092 !llvm.module.flags = !{!0}
1093-!opencl.enable.FP_CONTRACT = !{}
1094 !opencl.ocl.version = !{!1}
1095 !opencl.spir.version = !{!1}
1096-!opencl.used.extensions = !{!2}
1097-!opencl.used.optional.core.features = !{!2}
1098-!opencl.compiler.options = !{!2}
1099-!llvm.ident = !{!3}
1100+!llvm.ident = !{!2}
1101
1102 !0 = !{i32 1, !"wchar_size", i32 4}
1103 !1 = !{i32 2, i32 0}
1104-!2 = !{}
1105-!3 = !{!"clang version 7.0.0"}
1106-!4 = !{i32 1, i32 1, i32 0, i32 0}
1107-!5 = !{!"none", !"none", !"none", !"none"}
1108-!6 = !{!"int*", !"int*", !"int", !"char"}
1109-!7 = !{!"", !"", !"", !""}
1110-!8 = !{i1 false, i1 false, i1 false, i1 false}
1111-!9 = !{i32 0, i32 0, i32 0, i32 0}
1112+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
1113+!3 = !{i32 1, i32 1, i32 0, i32 0}
1114+!4 = !{!"none", !"none", !"none", !"none"}
1115+!5 = !{!"int*", !"int*", !"int", !"char"}
1116+!6 = !{!"", !"", !"", !""}
1117--
11181.8.3.1
1119
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch
new file mode 100644
index 0000000..9d25bba
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch
@@ -0,0 +1,66 @@
1From 7bbd0058362ac3bb5edd7a82d43e1785810776b3 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Fri, 29 Mar 2019 08:56:53 +0800
4Subject: [PATCH] dont export targets for binaries
5
6The projects using LLVM cmake modules look for target binaries in
7sysroot as a result which isn't desirable in this case and isn't needed
8either.
9
10Upstream-Status: Inappropriate [cross-compile specific]
11
12Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
13---
14 llvm/cmake/modules/AddLLVM.cmake | 9 ---------
15 llvm/cmake/modules/TableGen.cmake | 6 ------
16 2 files changed, 15 deletions(-)
17
18diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
19index 0df6845..b79f4fa 100644
20--- a/llvm/cmake/modules/AddLLVM.cmake
21+++ b/llvm/cmake/modules/AddLLVM.cmake
22@@ -866,12 +866,6 @@ macro(add_llvm_tool name)
23
24 if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
25 if( LLVM_BUILD_TOOLS )
26- if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
27- NOT LLVM_DISTRIBUTION_COMPONENTS)
28- set(export_to_llvmexports EXPORT LLVMExports)
29- set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True)
30- endif()
31-
32 install(TARGETS ${name}
33 ${export_to_llvmexports}
34 RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR}
35@@ -884,9 +878,6 @@ macro(add_llvm_tool name)
36 endif()
37 endif()
38 endif()
39- if( LLVM_BUILD_TOOLS )
40- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name})
41- endif()
42 set_target_properties(${name} PROPERTIES FOLDER "Tools")
43 endmacro(add_llvm_tool name)
44
45diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
46index 3c84ae7..141219f 100644
47--- a/llvm/cmake/modules/TableGen.cmake
48+++ b/llvm/cmake/modules/TableGen.cmake
49@@ -164,14 +164,8 @@ macro(add_tablegen target project)
50 endif()
51
52 if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
53- if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
54- NOT LLVM_DISTRIBUTION_COMPONENTS)
55- set(export_to_llvmexports EXPORT LLVMExports)
56- endif()
57-
58 install(TARGETS ${target}
59 ${export_to_llvmexports}
60 RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR})
61 endif()
62- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target})
63 endmacro()
64--
652.7.4
66
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch
new file mode 100644
index 0000000..0dfc537
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch
@@ -0,0 +1,59 @@
1From 6c33fb58869ffb17106047c45ab8d3856966eaf7 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Tue, 26 Mar 2019 14:11:29 +0800
4Subject: [PATCH] point to correct clang project and tblgen
5
6Point to correct path for clang project as per the way we unpack. Also
7let llvm-tblgen path be passed from recipe itself.
8
9Also since we're going to do the patching ourselves, no need to look for
10git through cmake.
11
12Upstream-Status: Inappropriate [OE specific]
13---
14 CMakeLists.txt | 8 ++++----
15 1 file changed, 4 insertions(+), 4 deletions(-)
16
17diff --git a/CMakeLists.txt b/CMakeLists.txt
18index 174133b..c769f08 100644
19--- a/CMakeLists.txt
20+++ b/CMakeLists.txt
21@@ -53,7 +53,7 @@ endif(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
22 include(AddLLVM)
23 include(TableGen)
24
25-find_package(Git REQUIRED)
26+#find_package(Git REQUIRED)
27
28 if (NOT WIN32)
29 add_subdirectory( linux_linker )
30@@ -80,7 +80,7 @@ set(TARGET_NAME ${COMMON_CLANG_LIBRARY_NAME}${BUILD_PLATFORM} )
31
32 if(NOT USE_PREBUILT_LLVM)
33 set(TARGET_BRANCH "ocl-open-80")
34- set(CLANG_SOURCE_DIR ${LLVM_SOURCE_DIR}/tools/clang)
35+ set(CLANG_SOURCE_DIR ${LLVM_SOURCE_DIR}/../clang)
36 set(CLANG_BASE_REVISION a03da8be08a208122e292016cb6cea1f30229677)
37
38 set(SPIRV_SOURCE_DIR ${LLVM_SOURCE_DIR}/projects/llvm-spirv)
39@@ -102,7 +102,7 @@ endif(NOT USE_PREBUILT_LLVM)
40 set (COMPILE_OPTIONS_TD opencl_clang_options.td)
41 set (COMPILE_OPTIONS_INC opencl_clang_options.inc)
42
43-set(LLVM_TABLEGEN_EXE "llvm-tblgen")
44+#set(LLVM_TABLEGEN_EXE "llvm-tblgen")
45 set(LLVM_TARGET_DEFINITIONS ${COMPILE_OPTIONS_TD})
46 if(USE_PREBUILT_LLVM)
47 set(TABLEGEN_ADDITIONAL -I ${LLVM_INCLUDE_DIRS})
48@@ -153,7 +153,7 @@ endif()
49
50 if(NOT USE_PREBUILT_LLVM)
51 set(CLANG_BINARY_DIR ${LLVM_BINARY_DIR}/tools/clang/)
52- set(CLANG_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}/tools/clang/)
53+ set(CLANG_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}/../clang/)
54 include_directories(
55 ${CLANG_BINARY_DIR}/include # for tablegened includes
56 ${CLANG_SOURCE_DIR}/include # for basic headers
57--
582.19.1
59
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch
new file mode 100644
index 0000000..2e935a1
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch
@@ -0,0 +1,294 @@
1From c94ec28600255098ffb9d73d1b386a7c8a535590 Mon Sep 17 00:00:00 2001
2From: Andrew Savonichev <andrew.savonichev@intel.com>
3Date: Thu, 21 Feb 2019 11:02:10 +0000
4Subject: [PATCH 2/2] [OpenCL] Simplify LLVM IR generated for OpenCL blocks
5
6Summary:
7Emit direct call of block invoke functions when possible, i.e. in case the
8block is not passed as a function argument.
9Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()`
10
11Reviewers: Anastasia, yaxunl, svenvh
12
13Reviewed By: Anastasia
14
15Subscribers: cfe-commits
16
17Tags: #clang
18
19Differential Revision: https://reviews.llvm.org/D58388
20
21git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354568 91177308-0d34-0410-b5e6-96231b3b80d8
22
23Upstream-Status: Backport
24[https://github.com/llvm-mirror/clang/commit/eae71f8d05ce550c4e2595c9b7082cc2c7882c58]
25Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
26---
27 lib/CodeGen/CGBlocks.cpp | 77 +++++++++++++-------------
28 lib/CodeGen/CGOpenCLRuntime.cpp | 30 +++++++---
29 lib/CodeGen/CGOpenCLRuntime.h | 4 ++
30 test/CodeGenOpenCL/blocks.cl | 10 +---
31 test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 34 +++++++++---
32 5 files changed, 91 insertions(+), 64 deletions(-)
33
34diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
35index fa3c3ee..10a0238 100644
36--- a/lib/CodeGen/CGBlocks.cpp
37+++ b/lib/CodeGen/CGBlocks.cpp
38@@ -1261,52 +1261,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
39 ReturnValueSlot ReturnValue) {
40 const BlockPointerType *BPT =
41 E->getCallee()->getType()->getAs<BlockPointerType>();
42-
43 llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
44-
45- // Get a pointer to the generic block literal.
46- // For OpenCL we generate generic AS void ptr to be able to reuse the same
47- // block definition for blocks with captures generated as private AS local
48- // variables and without captures generated as global AS program scope
49- // variables.
50- unsigned AddrSpace = 0;
51- if (getLangOpts().OpenCL)
52- AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
53-
54- llvm::Type *BlockLiteralTy =
55- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
56-
57- // Bitcast the callee to a block literal.
58- BlockPtr =
59- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
60-
61- // Get the function pointer from the literal.
62- llvm::Value *FuncPtr =
63- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
64- CGM.getLangOpts().OpenCL ? 2 : 3);
65-
66- // Add the block literal.
67+ llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType();
68+ llvm::Value *Func = nullptr;
69+ QualType FnType = BPT->getPointeeType();
70+ ASTContext &Ctx = getContext();
71 CallArgList Args;
72
73- QualType VoidPtrQualTy = getContext().VoidPtrTy;
74- llvm::Type *GenericVoidPtrTy = VoidPtrTy;
75 if (getLangOpts().OpenCL) {
76- GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType();
77- VoidPtrQualTy =
78- getContext().getPointerType(getContext().getAddrSpaceQualType(
79- getContext().VoidTy, LangAS::opencl_generic));
80- }
81-
82- BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy);
83- Args.add(RValue::get(BlockPtr), VoidPtrQualTy);
84-
85- QualType FnType = BPT->getPointeeType();
86+ // For OpenCL, BlockPtr is already casted to generic block literal.
87+
88+ // First argument of a block call is a generic block literal casted to
89+ // generic void pointer, i.e. i8 addrspace(4)*
90+ llvm::Value *BlockDescriptor = Builder.CreatePointerCast(
91+ BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType());
92+ QualType VoidPtrQualTy = Ctx.getPointerType(
93+ Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic));
94+ Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy);
95+ // And the rest of the arguments.
96+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
97+
98+ // We *can* call the block directly unless it is a function argument.
99+ if (!isa<ParmVarDecl>(E->getCalleeDecl()))
100+ Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
101+ else {
102+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2);
103+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
104+ }
105+ } else {
106+ // Bitcast the block literal to a generic block literal.
107+ BlockPtr = Builder.CreatePointerCast(
108+ BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal");
109+ // Get pointer to the block invoke function
110+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3);
111
112- // And the rest of the arguments.
113- EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
114+ // First argument is a block literal casted to a void pointer
115+ BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy);
116+ Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy);
117+ // And the rest of the arguments.
118+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
119
120- // Load the function.
121- llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
122+ // Load the function.
123+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
124+ }
125
126 const FunctionType *FuncTy = FnType->castAs<FunctionType>();
127 const CGFunctionInfo &FnInfo =
128diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
129index 7f6f595..75003e5 100644
130--- a/lib/CodeGen/CGOpenCLRuntime.cpp
131+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
132@@ -123,6 +123,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
133 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
134 }
135
136+// Get the block literal from an expression derived from the block expression.
137+// OpenCL v2.0 s6.12.5:
138+// Block variable declarations are implicitly qualified with const. Therefore
139+// all block variables must be initialized at declaration time and may not be
140+// reassigned.
141+static const BlockExpr *getBlockExpr(const Expr *E) {
142+ const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop.
143+ while(!isa<BlockExpr>(E) && E != Prev) {
144+ Prev = E;
145+ E = E->IgnoreCasts();
146+ if (auto DR = dyn_cast<DeclRefExpr>(E)) {
147+ E = cast<VarDecl>(DR->getDecl())->getInit();
148+ }
149+ }
150+ return cast<BlockExpr>(E);
151+}
152+
153 /// Record emitted llvm invoke function and llvm block literal for the
154 /// corresponding block expression.
155 void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
156@@ -137,20 +154,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
157 EnqueuedBlockMap[E].Kernel = nullptr;
158 }
159
160+llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
161+ return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
162+}
163+
164 CGOpenCLRuntime::EnqueuedBlockInfo
165 CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
166 CGF.EmitScalarExpr(E);
167
168 // The block literal may be assigned to a const variable. Chasing down
169 // to get the block literal.
170- if (auto DR = dyn_cast<DeclRefExpr>(E)) {
171- E = cast<VarDecl>(DR->getDecl())->getInit();
172- }
173- E = E->IgnoreImplicit();
174- if (auto Cast = dyn_cast<CastExpr>(E)) {
175- E = Cast->getSubExpr();
176- }
177- auto *Block = cast<BlockExpr>(E);
178+ const BlockExpr *Block = getBlockExpr(E);
179
180 assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
181 "Block expression not emitted");
182diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
183index 750721f..4effc7e 100644
184--- a/lib/CodeGen/CGOpenCLRuntime.h
185+++ b/lib/CodeGen/CGOpenCLRuntime.h
186@@ -92,6 +92,10 @@ public:
187 /// \param Block block literal emitted for the block expression.
188 void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
189 llvm::Value *Block);
190+
191+ /// \return LLVM block invoke function emitted for an expression derived from
192+ /// the block expression.
193+ llvm::Function *getInvokeFunction(const Expr *E);
194 };
195
196 }
197diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
198index 19aacc3..ab5a2c6 100644
199--- a/test/CodeGenOpenCL/blocks.cl
200+++ b/test/CodeGenOpenCL/blocks.cl
201@@ -39,11 +39,8 @@ void foo(){
202 // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
203 // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
204 // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
205- // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
206 // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
207- // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
208- // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
209- // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
210+ // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
211 // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
212 // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
213 // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
214@@ -53,11 +50,8 @@ void foo(){
215 // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
216 // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
217 // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
218- // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
219 // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
220- // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
221- // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
222- // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
223+ // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
224
225 int (^ block_B)(void) = ^{
226 return i;
227diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
228index 8445016..1566912 100644
229--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
230+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
231@@ -312,9 +312,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
232 };
233
234 // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
235- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
236- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
237- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
238+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
239 block_A();
240
241 // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
242@@ -333,15 +331,35 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
243 unsigned size = get_kernel_work_group_size(block_A);
244
245 // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
246- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
247- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
248- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
249+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
250 block_A();
251
252+ // Make sure that block invoke function is resolved correctly after sequence of assignements.
253+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
254+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
255+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
256+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
257+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1,
258+ bl_t b1 = block_G;
259+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
260+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
261+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
262+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
263+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2,
264+ bl_t b2 = b1;
265+ // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
266+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*)
267+ // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
268+ b2(0);
269+ // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
270+ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
271+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
272+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
273+ size = get_kernel_preferred_work_group_size_multiple(b2);
274+
275 void (^block_C)(void) = ^{
276 callee(i, a);
277 };
278-
279 // Emits block literal on stack and block kernel [[INVLK3]].
280 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
281 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
282@@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
283 // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
284 // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
285 // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
286+// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
287 // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
288 // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
289-// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
290 // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
291 // COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
292--
2931.8.3.1
294
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch
new file mode 100644
index 0000000..510c7c6
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch
@@ -0,0 +1,61 @@
1From 29e2813a2ab7d5569860bb07892dfef7b5374d96 Mon Sep 17 00:00:00 2001
2From: Yaxun Liu <Yaxun.Liu@amd.com>
3Date: Tue, 26 Feb 2019 16:20:41 +0000
4Subject: [PATCH] [OpenCL] Fix assertion due to blocks
5
6A recent change caused assertion in CodeGenFunction::EmitBlockCallExpr when a block is called.
7
8There is code
9
10 Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
11getCalleeDecl calls Expr::getReferencedDeclOfCallee, which does not handle
12BlockExpr and returns nullptr, which causes isa to assert.
13
14This patch fixes that.
15
16Differential Revision: https://reviews.llvm.org/D58658
17
18
19git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354893 91177308-0d34-0410-b5e6-96231b3b80d8
20
21Upstream-Status: Backport
22[https://github.com/llvm-mirror/clang/commit/29e2813a2ab7d5569860bb07892dfef7b5374d96]
23Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
24---
25 lib/AST/Expr.cpp | 2 ++
26 test/CodeGenOpenCL/blocks.cl | 6 ++++++
27 2 files changed, 8 insertions(+)
28
29diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
30index aef1eab..85690c7 100644
31--- a/lib/AST/Expr.cpp
32+++ b/lib/AST/Expr.cpp
33@@ -1358,6 +1358,8 @@ Decl *Expr::getReferencedDeclOfCallee() {
34 return DRE->getDecl();
35 if (MemberExpr *ME = dyn_cast<MemberExpr>(CEE))
36 return ME->getMemberDecl();
37+ if (auto *BE = dyn_cast<BlockExpr>(CEE))
38+ return BE->getBlockDecl();
39
40 return nullptr;
41 }
42diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
43index ab5a2c6..c3e2685 100644
44--- a/test/CodeGenOpenCL/blocks.cl
45+++ b/test/CodeGenOpenCL/blocks.cl
46@@ -90,6 +90,12 @@ int get42() {
47 return blockArgFunc(^{return 42;});
48 }
49
50+// COMMON-LABEL: define {{.*}}@call_block
51+// call {{.*}}@__call_block_block_invoke
52+int call_block() {
53+ return ^int(int num) { return num; } (11);
54+}
55+
56 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size"
57 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align"
58
59--
601.8.3.1
61
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
new file mode 100644
index 0000000..f536f0f
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
@@ -0,0 +1,16 @@
1FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:"
2
3SRC_URI_append_intel-x86-common = " \
4 git://github.com/intel/opencl-clang.git;protocol=https;branch=ocl-open-80;destsuffix=git/llvm/projects/opencl-clang;name=opencl-clang \
5 git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_80;destsuffix=git/llvm/projects/llvm-spirv;name=spirv \
6 file://0001-point-to-correct-clang.patch;patchdir=llvm/projects/opencl-clang \
7 file://0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch;patchdir=clang \
8 file://0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch;patchdir=clang \
9 file://0003-OpenCL-Fix-assertion-due-to-blocks.patch;patchdir=clang \
10 file://0001-dont-export-targets-for-binaries.patch \
11 file://0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch;patchdir=llvm/projects/llvm-spirv \
12 "
13
14SRCREV_opencl-clang = "daf5e4dd718477ae8cf89a283c653939d9182f15"
15SRCREV_spirv = "bd0f28fb92061d49c0f120b4dac3fd8956006745"
16