summaryrefslogtreecommitdiffstats
path: root/dynamic-layers/clang-layer
diff options
context:
space:
mode:
Diffstat (limited to 'dynamic-layers/clang-layer')
-rwxr-xr-xdynamic-layers/clang-layer/recipes-core/dnn/files/run-ptest15
-rw-r--r--dynamic-layers/clang-layer/recipes-core/dnn/onednn_3.4.1.bb57
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend13
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch156
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch1119
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch66
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch59
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch294
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch61
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend16
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/allow-to-find-cpp-generation-tool.patch51
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/disable-werror.patch16
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/external-ocloc.patch40
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/f10439aea214984a060566831f63d3aa198ef1b8.patch54
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_24.13.29138.7.bb65
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch35
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch31
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-fix-tblgen.patch24
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch34
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_1.0.16510.2.bb75
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch49
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-Request-native-clang-only-when-cross-compiling-464.patch60
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc34
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_15.0.0.bb5
24 files changed, 645 insertions, 1784 deletions
diff --git a/dynamic-layers/clang-layer/recipes-core/dnn/files/run-ptest b/dynamic-layers/clang-layer/recipes-core/dnn/files/run-ptest
new file mode 100755
index 00000000..82d4df58
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-core/dnn/files/run-ptest
@@ -0,0 +1,15 @@
1#!/bin/sh
2
3cd tests
4./api-c
5if [ $? -eq 0 ]; then
6 echo 'PASS: api-c'
7else
8 echo 'FAIL: api-c'
9fi
10./test_c_symbols-c
11if [ $? -eq 0 ]; then
12 echo 'PASS: test_c_symbols-c'
13else
14 echo 'FAIL: test_c_symbols-c'
15fi
diff --git a/dynamic-layers/clang-layer/recipes-core/dnn/onednn_3.4.1.bb b/dynamic-layers/clang-layer/recipes-core/dnn/onednn_3.4.1.bb
new file mode 100644
index 00000000..2fa3f627
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-core/dnn/onednn_3.4.1.bb
@@ -0,0 +1,57 @@
1HOMEPAGE = "https://www.oneapi.com"
2SUMMARY = "Deep Neural Network Library"
3DESCRIPTION = "This software is a user mode library that accelerates\
4deep-learning applications and frameworks on Intel architecture."
5LICENSE = "Apache-2.0 & BSD-3-Clause & BSL-1.0"
6LIC_FILES_CHKSUM = "file://LICENSE;md5=3b64000f6e7d52516017622a37a94ce9 \
7 file://tests/gtests/gtest/LICENSE;md5=cbbd27594afd089daa160d3a16dd515a \
8 file://src/cpu/x64/xbyak/COPYRIGHT;md5=3b9bf048d063d54cdb28964db558bcc7 \
9 file://src/common/ittnotify/LICENSE.BSD;md5=e671ff178b24a95a382ba670503c66fb \
10 "
11SECTION = "lib"
12
13inherit pkgconfig cmake ptest
14
15DNN_BRANCH = "rls-v${@'.'.join(d.getVar('PV').split('.')[0:2])}"
16
17S = "${WORKDIR}/git"
18SRCREV = "f5ff0a6de16c130053bec1a1aec3a9b826c66f78"
19SRC_URI = "git://github.com/oneapi-src/oneDNN.git;branch=${DNN_BRANCH};protocol=https \
20 file://run-ptest \
21 "
22
23UPSTREAM_CHECK_GITTAGREGEX = "^v(?P<pver>(\d+(\.\d+)+))$"
24
25CVE_PRODUCT = "intel:math_kernel_library"
26
27COMPATIBLE_HOST = '(x86_64).*-linux'
28COMPATIBLE_HOST:libc-musl = 'null'
29
30EXTRA_OECMAKE += " \
31 -DDNNL_LIBRARY_TYPE=SHARED \
32 -DDNNL_BUILD_EXAMPLES=ON \
33 -DDNNL_BUILD_TESTS=ON \
34 -DDNNL_CPU_RUNTIME=OMP \
35 -DDNNL_ARCH_OPT_FLAGS="" \
36 -DCMAKE_SKIP_RPATH=ON \
37 -DONEDNN_BUILD_GRAPH=OFF \
38 "
39
40PACKAGECONFIG ??= "gpu"
41PACKAGECONFIG[gpu] = "-DDNNL_GPU_RUNTIME=OCL, , opencl-headers virtual/opencl-icd, intel-compute-runtime"
42
43do_install:append () {
44 install -d ${D}${bindir}/mkl-dnn/tests/benchdnn/inputs
45 install -m 0755 ${B}/tests/benchdnn/benchdnn ${D}${bindir}/mkl-dnn/tests/benchdnn
46 cp -r ${B}/tests/benchdnn/inputs/* ${D}${bindir}/mkl-dnn/tests/benchdnn/inputs
47}
48
49do_install_ptest () {
50 install -d ${D}${PTEST_PATH}/tests
51 install -m 0755 ${B}/tests/api-c ${D}${PTEST_PATH}/tests
52 install -m 0755 ${B}/tests/test_c_symbols-c ${D}${PTEST_PATH}/tests
53}
54
55PACKAGES =+ "${PN}-test"
56
57FILES:${PN}-test = "${bindir}/mkl-dnn/*"
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend
deleted file mode 100644
index f8d5a252..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend
+++ /dev/null
@@ -1,13 +0,0 @@
1FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:"
2
3DEPENDS_append = " opencl-clang-native"
4LLVM_TARGETS_TO_BUILD = "X86"
5
6do_install_append_intel-x86-common() {
7 DESTDIR=${D} ninja -v install-cmake-exports
8}
9
10LIBCPLUSPLUS = ""
11
12# undefined reference to `__atomic_load' on i*86.
13COMPATIBLE_HOST = '(x86_64).*-linux'
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch
deleted file mode 100644
index 1c491402..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch
+++ /dev/null
@@ -1,156 +0,0 @@
1From 39a3ac0065c23d1e2d55dfd8792cc28a146a4307 Mon Sep 17 00:00:00 2001
2From: Alexey Bader <alexey.bader@intel.com>
3Date: Tue, 19 Feb 2019 15:19:06 +0000
4Subject: [PATCH 1/2] [OpenCL] Change type of block pointer for OpenCL
5
6Summary:
7
8For some reason OpenCL blocks in LLVM IR are represented as function pointers.
9These pointers do not point to any real function and never get called. Actually
10they point to some structure, which in turn contains pointer to the real block
11invoke function.
12This patch changes represntation of OpenCL blocks in LLVM IR from function
13pointers to pointers to `%struct.__block_literal_generic`.
14Such representation allows to avoid unnecessary bitcasts and simplifies
15further processing (e.g. translation to SPIR-V ) of the module for targets
16which do not support function pointers.
17
18Patch by: Alexey Sotkin.
19
20Reviewers: Anastasia, yaxunl, svenvh
21
22Reviewed By: Anastasia
23
24Subscribers: alexbatashev, cfe-commits
25
26Tags: #clang
27
28Differential Revision: https://reviews.llvm.org/D58277
29
30git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354337 91177308-0d34-0410-b5e6-96231b3b80d8
31
32Upstream-Status: Backport
33[https://github.com/llvm-mirror/clang/commit/283f308bdb5893bab1f36791711346e746045f94]
34Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
35---
36 lib/CodeGen/CodeGenTypes.cpp | 4 +++-
37 test/CodeGenOpenCL/blocks.cl | 18 ++++++++----------
38 test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 18 +++++++++---------
39 3 files changed, 20 insertions(+), 20 deletions(-)
40
41diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
42index 2acf1ac..93b3ebf 100644
43--- a/lib/CodeGen/CodeGenTypes.cpp
44+++ b/lib/CodeGen/CodeGenTypes.cpp
45@@ -637,7 +637,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
46
47 case Type::BlockPointer: {
48 const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType();
49- llvm::Type *PointeeType = ConvertTypeForMem(FTy);
50+ llvm::Type *PointeeType = CGM.getLangOpts().OpenCL
51+ ? CGM.getGenericBlockLiteralType()
52+ : ConvertTypeForMem(FTy);
53 unsigned AS = Context.getTargetAddressSpace(FTy);
54 ResultType = llvm::PointerType::get(PointeeType, AS);
55 break;
56diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
57index 675240c..19aacc3 100644
58--- a/test/CodeGenOpenCL/blocks.cl
59+++ b/test/CodeGenOpenCL/blocks.cl
60@@ -35,11 +35,10 @@ void foo(){
61 // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
62 // SPIR: %[[i_value:.*]] = load i32, i32* %i
63 // SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
64- // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
65- // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
66- // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
67- // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
68- // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
69+ // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic*
70+ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
71+ // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
72+ // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
73 // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
74 // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
75 // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
76@@ -50,11 +49,10 @@ void foo(){
77 // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
78 // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
79 // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
80- // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
81- // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
82- // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
83- // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
84- // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic*
85+ // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)*
86+ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
87+ // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
88+ // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
89 // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
90 // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
91 // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
92diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
93index 4732194..8445016 100644
94--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
95+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
96@@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t;
97
98 // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
99 // COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
100-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
101+// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*)
102
103 // For anonymous blocks without captures, emit block literals as global variable.
104 // COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
105@@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
106 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
107 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
108 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
109- // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
110- // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
111- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
112+ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic*
113+ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
114+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
115 // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
116 // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
117 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
118@@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
119 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
120 // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
121 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
122- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
123- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
124+ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic*
125+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
126 // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
127 // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
128 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
129@@ -300,13 +300,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
130 // Emits global block literal [[BLG8]] and invoke function [[INVG8]].
131 // The full type of these expressions are long (and repeated elsewhere), so we
132 // capture it as part of the regex for convenience and clarity.
133- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
134+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A
135 void (^const block_A)(void) = ^{
136 return;
137 };
138
139 // Emits global block literal [[BLG9]] and invoke function [[INVG9]].
140- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
141+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B
142 void (^const block_B)(local void *) = ^(local void *a) {
143 return;
144 };
145@@ -346,7 +346,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
146 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
147 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
148 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
149- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
150+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
151 // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
152 // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
153 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
154--
1551.8.3.1
156
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
deleted file mode 100644
index 4a528674..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
+++ /dev/null
@@ -1,1119 +0,0 @@
1From 9ce0fe02fd6cda5fb29fbb0d5037a1798a810b8a Mon Sep 17 00:00:00 2001
2From: Alexey Sotkin <alexey.sotkin@intel.com>
3Date: Thu, 21 Feb 2019 17:14:36 +0300
4Subject: [PATCH 1/3] Update LowerOpenCL pass to handle new blocks
5 represntation in LLVM IR
6
7Upstream-Status: Backport
8[https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/bd6ddfaf7232cd81c7f2fe9877e66f286731bd8e]
9Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
10---
11 lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 413 ++++++++----------------------
12 test/global_block.ll | 71 ++---
13 test/literal-struct.ll | 31 ++-
14 test/transcoding/block_w_struct_return.ll | 47 ++--
15 test/transcoding/enqueue_kernel.ll | 237 ++++++++++-------
16 5 files changed, 317 insertions(+), 482 deletions(-)
17
18diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
19index 50e1838..b42a4ec 100644
20--- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
21+++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
22@@ -1,303 +1,110 @@
23-//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===//
24-//
25-// The LLVM/SPIRV Translator
26-//
27-// This file is distributed under the University of Illinois Open Source
28-// License. See LICENSE.TXT for details.
29-//
30-// Copyright (c) 2018 Intel Corporation. All rights reserved.
31-//
32-// Permission is hereby granted, free of charge, to any person obtaining a
33-// copy of this software and associated documentation files (the "Software"),
34-// to deal with the Software without restriction, including without limitation
35-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
36-// and/or sell copies of the Software, and to permit persons to whom the
37-// Software is furnished to do so, subject to the following conditions:
38-//
39-// Redistributions of source code must retain the above copyright notice,
40-// this list of conditions and the following disclaimers.
41-// Redistributions in binary form must reproduce the above copyright notice,
42-// this list of conditions and the following disclaimers in the documentation
43-// and/or other materials provided with the distribution.
44-// Neither the names of Intel Corporation, nor the names of its
45-// contributors may be used to endorse or promote products derived from this
46-// Software without specific prior written permission.
47-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
48-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
49-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
50-// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
52-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
53-// THE SOFTWARE.
54-//
55-//===----------------------------------------------------------------------===//
56-//
57-// SPIR-V specification doesn't allow function pointers, so SPIR-V translator
58-// is designed to fail if a value with function type (except calls) is occured.
59-// Currently there is only two cases, when function pointers are generating in
60-// LLVM IR in OpenCL - block calls and device side enqueue built-in calls.
61-//
62-// In both cases values with function type used as intermediate representation
63-// for block literal structure.
64-//
65-// This pass is designed to find such cases and simplify them to avoid any
66-// function pointer types occurrences in LLVM IR in 4 steps.
67-//
68-// 1. Find all function pointer allocas, like
69-// %block = alloca void () *
70-//
71-// Then find a single store to that alloca:
72-// %blockLit = alloca <{ i32, i32, ...}>, align 4
73-// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()*
74-// > store void ()* %0, void ()** %block, align 4
75-//
76-// And replace the alloca users by new instructions which used stored value
77-// %blockLit itself instead of function pointer alloca %block.
78-//
79-// 2. Find consecutive casts from block literal type to i8 addrspace(4)*
80-// used function pointers as an intermediate type:
81-// %0 = bitcast <{ i32, i32 }> %block to void() *
82-// %1 = addrspacecast void() * %0 to i8 addrspace(4)*
83-// And simplify them:
84-// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)*
85-//
86-// 3. Find all unused instructions with function pointer type occured after
87-// pp.1-2 and remove them.
88-//
89-// 4. Find unused globals with function pointer type, like
90-// @block = constant void ()*
91-// bitcast ({ i32, i32 }* @__block_literal_global to void ()*
92-//
93-// And remove them.
94-//
95-//===----------------------------------------------------------------------===//
96-#define DEBUG_TYPE "spv-lower-ocl-blocks"
97-
98-#include "OCLUtil.h"
99-#include "SPIRVInternal.h"
100-
101-#include "llvm/ADT/SetVector.h"
102-#include "llvm/Analysis/ValueTracking.h"
103-#include "llvm/IR/GlobalVariable.h"
104-#include "llvm/IR/InstIterator.h"
105-#include "llvm/IR/Module.h"
106-#include "llvm/Pass.h"
107-#include "llvm/PassSupport.h"
108-#include "llvm/Support/Casting.h"
109-
110-using namespace llvm;
111-
112-namespace {
113-
114-static void
115-removeUnusedFunctionPtrInst(Instruction *I,
116- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
117- for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) {
118- Instruction *OpI = dyn_cast<Instruction>(I->getOperand(OpIdx));
119- I->setOperand(OpIdx, nullptr);
120- if (OpI && OpI != I && OpI->user_empty())
121- FuncPtrInsts.insert(OpI);
122- }
123- I->eraseFromParent();
124-}
125-
126-static bool isFuncPtrAlloca(const AllocaInst *AI) {
127- auto *ET = dyn_cast<PointerType>(AI->getAllocatedType());
128- return ET && ET->getElementType()->isFunctionTy();
129-}
130-
131-static bool hasFuncPtrType(const Value *V) {
132- auto *PT = dyn_cast<PointerType>(V->getType());
133- return PT && PT->getElementType()->isFunctionTy();
134-}
135-
136-static bool isFuncPtrInst(const Instruction *I) {
137- if (auto *AI = dyn_cast<AllocaInst>(I))
138- return isFuncPtrAlloca(AI);
139-
140- for (auto &Op : I->operands()) {
141- if (auto *AI = dyn_cast<AllocaInst>(Op))
142- return isFuncPtrAlloca(AI);
143-
144- auto *OpI = dyn_cast<Instruction>(&Op);
145- if (OpI && OpI != I && hasFuncPtrType(OpI))
146- return true;
147- }
148- return false;
149-}
150-
151-static StoreInst *findSingleStore(AllocaInst *AI) {
152- StoreInst *Store = nullptr;
153- for (auto *U : AI->users()) {
154- if (!isa<StoreInst>(U))
155- continue; // not a store
156- if (Store)
157- return nullptr; // there are more than one stores
158- Store = dyn_cast<StoreInst>(U);
159- }
160- return Store;
161-}
162-
163-static void fixFunctionPtrAllocaUsers(AllocaInst *AI) {
164- // Find and remove a single store to alloca
165- auto *SingleStore = findSingleStore(AI);
166- assert(SingleStore && "More than one store to the function pointer alloca");
167- auto *StoredVal = SingleStore->getValueOperand();
168- SingleStore->eraseFromParent();
169-
170- // Find loads from the alloca and replace thier users
171- for (auto *U : AI->users()) {
172- auto *LI = dyn_cast<LoadInst>(U);
173- if (!LI)
174- continue;
175-
176- for (auto *U : LI->users()) {
177- auto *UInst = cast<Instruction>(U);
178- auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast(
179- StoredVal, UInst->getType(), "", UInst);
180- UInst->replaceAllUsesWith(Cast);
181- }
182- }
183-}
184-
185-static int getBlockLiteralIdx(const Function &F) {
186- StringRef FName = F.getName();
187- if (isEnqueueKernelBI(FName))
188- return FName.contains("events") ? 7 : 4;
189- if (isKernelQueryBI(FName))
190- return FName.contains("for_ndrange") ? 2 : 1;
191- if (FName.startswith("__") && FName.contains("_block_invoke"))
192- return F.hasStructRetAttr() ? 1 : 0;
193-
194- return -1; // No block literal argument
195-}
196-
197-static bool hasBlockLiteralArg(const Function &F) {
198- return getBlockLiteralIdx(F) != -1;
199-}
200-
201-static bool simplifyFunctionPtrCasts(Function &F) {
202- bool Changed = false;
203- int BlockLiteralIdx = getBlockLiteralIdx(F);
204- for (auto *U : F.users()) {
205- auto *Call = dyn_cast<CallInst>(U);
206- if (!Call)
207- continue;
208- if (Call->getFunction()->getName() == F.getName().str() + "_kernel")
209- continue; // Skip block invoke function calls inside block invoke kernels
210-
211- const DataLayout &DL = F.getParent()->getDataLayout();
212- auto *BlockLiteral = Call->getOperand(BlockLiteralIdx);
213- auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL);
214- if (isa<GlobalVariable>(BlockLiteralVal))
215- continue; // nothing to do with globals
216-
217- auto *BlockLiteralAlloca = cast<AllocaInst>(BlockLiteralVal);
218- assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() &&
219- "Function type shouldn't be there");
220-
221- auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast(
222- BlockLiteralAlloca, BlockLiteral->getType(), "", Call);
223- BlockLiteral->replaceAllUsesWith(NewBlockLiteral);
224- Changed |= true;
225- }
226- return Changed;
227-}
228-
229-static void
230-findFunctionPtrAllocas(Module &M,
231- SmallVectorImpl<AllocaInst *> &FuncPtrAllocas) {
232- for (auto &F : M) {
233- if (F.isDeclaration())
234- continue;
235- for (auto &I : instructions(F)) {
236- auto *AI = dyn_cast<AllocaInst>(&I);
237- if (!AI || !isFuncPtrAlloca(AI))
238- continue;
239- FuncPtrAllocas.push_back(AI);
240- }
241- }
242-}
243-
244-static void
245-findUnusedFunctionPtrInsts(Module &M,
246- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
247- for (auto &F : M) {
248- if (F.isDeclaration())
249- continue;
250- for (auto &I : instructions(F))
251- if (I.user_empty() && isFuncPtrInst(&I))
252- FuncPtrInsts.insert(&I);
253- }
254-}
255-
256-static void
257-findUnusedFunctionPtrGlbs(Module &M,
258- SmallVectorImpl<GlobalVariable *> &FuncPtrGlbs) {
259- for (auto &GV : M.globals()) {
260- if (!GV.user_empty())
261- continue;
262- auto *GVType = dyn_cast<PointerType>(GV.getType()->getElementType());
263- if (GVType && GVType->getElementType()->isFunctionTy())
264- FuncPtrGlbs.push_back(&GV);
265- }
266-}
267-
268-class SPIRVLowerOCLBlocks : public ModulePass {
269-
270-public:
271- SPIRVLowerOCLBlocks() : ModulePass(ID) {}
272-
273- bool runOnModule(Module &M) {
274- bool Changed = false;
275-
276- // 1. Find function pointer allocas and fix their users
277- SmallVector<AllocaInst *, 16> FuncPtrAllocas;
278- findFunctionPtrAllocas(M, FuncPtrAllocas);
279-
280- Changed |= !FuncPtrAllocas.empty();
281- for (auto *AI : FuncPtrAllocas)
282- fixFunctionPtrAllocaUsers(AI);
283-
284- // 2. Simplify consecutive casts which use function pointer types
285- for (auto &F : M)
286- if (hasBlockLiteralArg(F))
287- Changed |= simplifyFunctionPtrCasts(F);
288-
289- // 3. Cleanup unused instructions with function pointer type
290- // which are occured after pp. 1-2
291- SmallSetVector<Instruction *, 16> FuncPtrInsts;
292- findUnusedFunctionPtrInsts(M, FuncPtrInsts);
293-
294- Changed |= !FuncPtrInsts.empty();
295- while (!FuncPtrInsts.empty()) {
296- Instruction *I = FuncPtrInsts.pop_back_val();
297- removeUnusedFunctionPtrInst(I, FuncPtrInsts);
298- }
299-
300- // 4. Find and remove unused global variables with function pointer type
301- SmallVector<GlobalVariable *, 16> FuncPtrGlbs;
302- findUnusedFunctionPtrGlbs(M, FuncPtrGlbs);
303-
304- Changed |= !FuncPtrGlbs.empty();
305- for (auto *GV : FuncPtrGlbs)
306- GV->eraseFromParent();
307-
308- return Changed;
309- }
310-
311- static char ID;
312-}; // class SPIRVLowerOCLBlocks
313-
314-char SPIRVLowerOCLBlocks::ID = 0;
315-
316-} // namespace
317-
318-INITIALIZE_PASS(
319- SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks",
320- "Remove function pointers occured in case of using OpenCL blocks", false,
321- false)
322-
323-llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() {
324- return new SPIRVLowerOCLBlocks();
325-}
326+//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===//
327+//
328+// The LLVM/SPIRV Translator
329+//
330+// This file is distributed under the University of Illinois Open Source
331+// License. See LICENSE.TXT for details.
332+//
333+// Copyright (c) 2018 Intel Corporation. All rights reserved.
334+//
335+// Permission is hereby granted, free of charge, to any person obtaining a
336+// copy of this software and associated documentation files (the "Software"),
337+// to deal with the Software without restriction, including without limitation
338+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
339+// and/or sell copies of the Software, and to permit persons to whom the
340+// Software is furnished to do so, subject to the following conditions:
341+//
342+// Redistributions of source code must retain the above copyright notice,
343+// this list of conditions and the following disclaimers.
344+// Redistributions in binary form must reproduce the above copyright notice,
345+// this list of conditions and the following disclaimers in the documentation
346+// and/or other materials provided with the distribution.
347+// Neither the names of Intel Corporation, nor the names of its
348+// contributors may be used to endorse or promote products derived from this
349+// Software without specific prior written permission.
350+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
351+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
352+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
353+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
354+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
355+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
356+// THE SOFTWARE.
357+//
358+//===----------------------------------------------------------------------===//
359+//
360+// SPIR-V specification doesn't allow function pointers, so SPIR-V translator
361+// is designed to fail if a value with function type (except calls) is occured.
362+// Currently there is only two cases, when function pointers are generating in
363+// LLVM IR in OpenCL - block calls and device side enqueue built-in calls.
364+//
365+// In both cases values with function type used as intermediate representation
366+// for block literal structure.
367+//
368+// In LLVM IR produced by clang, blocks are represented with the following
369+// structure:
370+// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
371+// Pointers to block invoke functions are stored in the third field. Clang
372+// replaces inderect function calls in all cases except if block is passed as a
373+// function argument. Note that it is somewhat unclear if the OpenCL C spec
374+// should allow passing blocks as function argumernts. This pass is not supposed
375+// to work correctly with such functions.
376+// Clang though has to store function pointers to this structure. Purpose of
377+// this pass is to replace store of function pointers(not allowed in SPIR-V)
378+// with null pointers.
379+//
380+//===----------------------------------------------------------------------===//
381+#define DEBUG_TYPE "spv-lower-ocl-blocks"
382+
383+#include "SPIRVInternal.h"
384+
385+#include "llvm/IR/Module.h"
386+#include "llvm/Pass.h"
387+#include "llvm/Support/Regex.h"
388+
389+using namespace llvm;
390+
391+namespace {
392+
393+static bool isBlockInvoke(Function &F) {
394+ static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$");
395+ return BlockInvokeRegex.match(F.getName());
396+}
397+
398+class SPIRVLowerOCLBlocks : public ModulePass {
399+
400+public:
401+ SPIRVLowerOCLBlocks() : ModulePass(ID) {}
402+
403+ bool runOnModule(Module &M) {
404+ bool Changed = false;
405+ for (Function &F : M) {
406+ if (!isBlockInvoke(F))
407+ continue;
408+ for (User *U : F.users()) {
409+ if (!isa<Constant>(U))
410+ continue;
411+ Constant *Null = Constant::getNullValue(U->getType());
412+ if (U != Null) {
413+ U->replaceAllUsesWith(Null);
414+ Changed = true;
415+ }
416+ }
417+ }
418+ return Changed;
419+ }
420+
421+ static char ID;
422+};
423+
424+char SPIRVLowerOCLBlocks::ID = 0;
425+
426+} // namespace
427+
428+INITIALIZE_PASS(
429+ SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks",
430+ "Remove function pointers occured in case of using OpenCL blocks", false,
431+ false)
432+
433+llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() {
434+ return new SPIRVLowerOCLBlocks();
435+}
436diff --git a/test/global_block.ll b/test/global_block.ll
437index a9267d8..efb4cf3 100644
438--- a/test/global_block.ll
439+++ b/test/global_block.ll
440@@ -16,7 +16,7 @@
441 ; RUN: llvm-spirv %t.bc -o %t.spv
442 ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM
443
444-target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
445+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
446 target triple = "spir-unknown-unknown"
447
448 ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke"
449@@ -26,71 +26,56 @@ target triple = "spir-unknown-unknown"
450 ; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]]
451 ; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]]
452
453-;; This variable is not needed in SPIRV
454-; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1
455-; CHECK-LLVM-NOT: @block_kernel.b1
456-@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8
457+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
458
459-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
460+@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4
461+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
462
463-; Function Attrs: convergent nounwind
464-define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
465+; Function Attrs: convergent noinline nounwind optnone
466+define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
467 entry:
468- %res.addr = alloca i32 addrspace(1)*, align 8
469- store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
470-
471+ %res.addr = alloca i32 addrspace(1)*, align 4
472+ store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4
473 ; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]]
474 ; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5)
475- %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
476-
477- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
478- store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14
479+ %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
480+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4
481+ store i32 %call, i32 addrspace(1)* %0, align 4
482 ret void
483 }
484
485-; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]]
486+; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]]
487 ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}}
488 ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}}
489 ; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}})
490-; Function Attrs: convergent nounwind
491+; Function Attrs: convergent noinline nounwind optnone
492 define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 {
493 entry:
494- %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
495+ %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
496 %i.addr = alloca i32, align 4
497- store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
498- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
499- store i32 %i, i32* %i.addr, align 4, !tbaa !14
500- %0 = load i32, i32* %i.addr, align 4, !tbaa !14
501+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
502+ store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
503+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
504+ store i32 %i, i32* %i.addr, align 4
505+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
506+ %0 = load i32, i32* %i.addr, align 4
507 %add = add nsw i32 %0, 1
508 ret i32 %add
509 }
510
511-attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
512-attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
513+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
514+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
515 attributes #2 = { convergent }
516
517 !llvm.module.flags = !{!0}
518-!opencl.enable.FP_CONTRACT = !{}
519 !opencl.ocl.version = !{!1}
520 !opencl.spir.version = !{!1}
521-!opencl.used.extensions = !{!2}
522-!opencl.used.optional.core.features = !{!2}
523-!opencl.compiler.options = !{!2}
524-!llvm.ident = !{!3}
525+!llvm.ident = !{!2}
526
527 !0 = !{i32 1, !"wchar_size", i32 4}
528 !1 = !{i32 2, i32 0}
529-!2 = !{}
530-!3 = !{!"clang version 7.0.0"}
531-!4 = !{i32 1}
532-!5 = !{!"none"}
533-!6 = !{!"int*"}
534-!7 = !{!""}
535-!8 = !{i1 false}
536-!9 = !{i32 0}
537-!10 = !{!11, !11, i64 0}
538-!11 = !{!"any pointer", !12, i64 0}
539-!12 = !{!"omnipotent char", !13, i64 0}
540-!13 = !{!"Simple C/C++ TBAA"}
541-!14 = !{!15, !15, i64 0}
542-!15 = !{!"int", !12, i64 0}
543+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
544+!3 = !{i32 1}
545+!4 = !{!"none"}
546+!5 = !{!"int*"}
547+!6 = !{!""}
548diff --git a/test/literal-struct.ll b/test/literal-struct.ll
549index c52170a..52a731a 100644
550--- a/test/literal-struct.ll
551+++ b/test/literal-struct.ll
552@@ -2,7 +2,7 @@
553 ; structs, i.e. structs whose type has no name. Typicaly clang generate such
554 ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with
555 ; the following command:
556-; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll
557+; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll
558
559 ; literal-struct.cl:
560 ; void foo()
561@@ -14,25 +14,28 @@
562 ; RUN: llvm-as < %s | llvm-spirv -spirv-text -o %t
563 ; RUN: FileCheck < %t %s
564
565-; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0
566-; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}}
567+; CHECK: TypeInt [[Int:[0-9]+]] 32 0
568+; CHECK: TypeInt [[Int8:[0-9]+]] 8 0
569+; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]]
570+; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]]
571
572 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
573 target triple = "spir"
574
575-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
576+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
577+
578+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
579 ; CHECK: ConstantComposite [[StructType]]
580
581-; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct.
582-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4
583+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4
584 ; CHECK: ConstantNull [[StructType]]
585
586 ; Function Attrs: convergent noinline nounwind optnone
587 define spir_func void @foo() #0 {
588 entry:
589- %myBlock = alloca void () addrspace(4)*, align 4
590- store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4
591- call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
592+ %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4
593+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4
594+ call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
595 ret void
596 }
597
598@@ -40,14 +43,14 @@ entry:
599 define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 {
600 entry:
601 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
602- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
603+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
604 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
605- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
606- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
607+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
608+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
609 ret void
610 }
611
612-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
613+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
614 attributes #1 = { convergent }
615
616 !llvm.module.flags = !{!0}
617@@ -57,4 +60,4 @@ attributes #1 = { convergent }
618
619 !0 = !{i32 1, !"wchar_size", i32 4}
620 !1 = !{i32 2, i32 0}
621-!2 = !{!"clang version 8.0.0 "}
622+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
623diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll
624index 76e29f0..df89b13 100644
625--- a/test/transcoding/block_w_struct_return.ll
626+++ b/test/transcoding/block_w_struct_return.ll
627@@ -16,6 +16,8 @@
628 ; res[tid] = kernelBlock(aa).a - 6;
629 ; }
630
631+; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll
632+
633 ; RUN: llvm-as %s -o %t.bc
634 ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt
635 ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV
636@@ -27,12 +29,14 @@
637 ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke"
638
639 ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32
640+; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8
641+; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]]
642 ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]]
643 ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]]
644
645 ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7
646 ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7
647-; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}}
648+; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}}
649 ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]]
650
651 ; CHECK-LLVM: %[[StructA:.*]] = type { i32 }
652@@ -41,20 +45,21 @@
653 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
654 target triple = "spir64-unknown-unknown"
655
656+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
657 %struct.A = type { i32 }
658
659-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
660+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8
661
662 ; Function Attrs: convergent noinline nounwind optnone
663-define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 {
664+define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
665 entry:
666 %res.addr = alloca i32 addrspace(1)*, align 8
667- %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8
668+ %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8
669 %tid = alloca i64, align 8
670 %aa = alloca %struct.A, align 4
671 %tmp = alloca %struct.A, align 4
672 store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8
673- store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8
674+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8
675 %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4
676 store i64 %call, i64* %tid, align 8
677 %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8
678@@ -63,7 +68,7 @@ entry:
679 store i32 -1, i32 addrspace(1)* %arrayidx, align 4
680 %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0
681 store i32 5, i32* %a, align 4
682- call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
683+ call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
684 %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0
685 %2 = load i32, i32* %a1, align 4
686 %sub = sub nsw i32 %2, 6
687@@ -78,10 +83,10 @@ entry:
688 define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 {
689 entry:
690 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
691- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8
692+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8
693 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
694- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
695- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8
696+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
697+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8
698 %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0
699 store i32 6, i32* %a1, align 4
700 %0 = bitcast %struct.A* %agg.result to i8*
701@@ -96,30 +101,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r
702 ; Function Attrs: convergent nounwind readnone
703 declare spir_func i64 @_Z13get_global_idj(i32) #3
704
705-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
706-attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
707+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
708+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
709 attributes #2 = { argmemonly nounwind }
710 attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
711 attributes #4 = { convergent nounwind readnone }
712 attributes #5 = { convergent }
713
714 !llvm.module.flags = !{!0}
715-!opencl.enable.FP_CONTRACT = !{}
716 !opencl.ocl.version = !{!1}
717 !opencl.spir.version = !{!1}
718-!opencl.used.extensions = !{!2}
719-!opencl.used.optional.core.features = !{!2}
720-!opencl.compiler.options = !{!2}
721-!llvm.ident = !{!3}
722+!llvm.ident = !{!2}
723
724 !0 = !{i32 1, !"wchar_size", i32 4}
725 !1 = !{i32 2, i32 0}
726-!2 = !{}
727-!3 = !{!"clang version 7.0.0"}
728-!4 = !{i32 1}
729-!5 = !{!"none"}
730-!6 = !{!"int*"}
731-!7 = !{!""}
732-!8 = !{i1 false}
733-!9 = !{i32 0}
734-
735+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
736+!3 = !{i32 1}
737+!4 = !{!"none"}
738+!5 = !{!"int*"}
739+!6 = !{!""}
740diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll
741index 0d29c71..435871d 100644
742--- a/test/transcoding/enqueue_kernel.ll
743+++ b/test/transcoding/enqueue_kernel.ll
744@@ -51,11 +51,12 @@
745 ; ModuleID = 'enqueue_kernel.cl'
746 source_filename = "enqueue_kernel.cl"
747 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
748-target triple = "spir-unknown-unknown"
749+target triple = "spir"
750
751 %opencl.queue_t = type opaque
752 %struct.ndrange_t = type { i32 }
753 %opencl.clk_event_t = type opaque
754+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
755
756 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel"
757 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel"
758@@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown"
759
760 ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32
761 ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8
762-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
763 ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0
764-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17
765+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21
766 ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2
767-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20
768-; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
769+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
770+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24
771
772 ; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}}
773+; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
774+; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
775 ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]]
776 ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]]
777-; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
778 ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]]
779 ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
780 ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
781 ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
782 ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]]
783
784-; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32 }
785-; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }>
786-; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
787-; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32 }>
788+; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32, i8 addrspace(4)* }
789+; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>
790+; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
791+; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)* }>
792
793-; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
794-; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
795+; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
796+; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
797
798-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
799-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
800+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4
801+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4
802
803 ; Function Attrs: convergent noinline nounwind optnone
804-define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
805+define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
806 entry:
807+ %a.addr = alloca i32 addrspace(1)*, align 4
808+ %b.addr = alloca i32 addrspace(1)*, align 4
809+ %i.addr = alloca i32, align 4
810+ %c0.addr = alloca i8, align 1
811 %default_queue = alloca %opencl.queue_t*, align 4
812 %flags = alloca i32, align 4
813 %ndrange = alloca %struct.ndrange_t, align 4
814 %clk_event = alloca %opencl.clk_event_t*, align 4
815 %event_wait_list = alloca %opencl.clk_event_t*, align 4
816 %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4
817- %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4
818- %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
819+ %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4
820+ %tmp = alloca %struct.ndrange_t, align 4
821+ %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
822+ %tmp4 = alloca %struct.ndrange_t, align 4
823 %c = alloca i8, align 1
824+ %tmp11 = alloca %struct.ndrange_t, align 4
825+ %block_sizes = alloca [1 x i32], align 4
826+ %tmp12 = alloca %struct.ndrange_t, align 4
827+ %block_sizes13 = alloca [3 x i32], align 4
828+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
829+ store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4
830+ store i32 %i, i32* %i.addr, align 4
831+ store i8 %c0, i8* %c0.addr, align 1
832 store i32 0, i32* %flags, align 4
833 %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
834 %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4
835 store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4
836 %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
837 %2 = load i32, i32* %flags, align 4
838- %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
839- store i32 17, i32* %block.size, align 4
840- %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
841+ %3 = bitcast %struct.ndrange_t* %tmp to i8*
842+ %4 = bitcast %struct.ndrange_t* %ndrange to i8*
843+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false)
844+ %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
845+ store i32 21, i32* %block.size, align 4
846+ %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
847 store i32 4, i32* %block.align, align 4
848- %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
849- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4
850- %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
851- store i32 %i, i32* %block.captured1, align 4
852- %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
853- store i8 %c0, i8* %block.captured2, align 4
854- %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()*
855- %4 = addrspacecast void ()* %3 to i8 addrspace(4)*
856+ %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
857+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4
858+ %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
859+ %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
860+ store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4
861+ %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
862+ %6 = load i32, i32* %i.addr, align 4
863+ store i32 %6, i32* %block.captured1, align 4
864+ %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5
865+ %7 = load i8, i8* %c0.addr, align 1
866+ store i8 %7, i8* %block.captured2, align 4
867+ %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic*
868+ %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)*
869
870 ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]]
871 ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}}
872 ; [[ConstInt0]] [[EventNull]] [[EventNull]]
873 ; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]]
874
875-; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)*
876+; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic*
877+; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)*
878 ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)*
879-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]])
880-
881- %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4)
882- %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
883- %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
884- %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
885- store i32 20, i32* %block.size5, align 4
886- %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
887+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]])
888+
889+ %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
890+ %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
891+ %12 = load i32, i32* %flags, align 4
892+ %13 = bitcast %struct.ndrange_t* %tmp4 to i8*
893+ %14 = bitcast %struct.ndrange_t* %ndrange to i8*
894+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false)
895+ %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
896+ %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
897+ %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
898+ store i32 24, i32* %block.size5, align 4
899+ %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
900 store i32 4, i32* %block.align6, align 4
901- %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
902- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4
903- %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
904- store i32 %i, i32* %block.captured8, align 4
905- %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
906- store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4
907- %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()*
908- %9 = addrspacecast void ()* %8 to i8 addrspace(4)*
909+ %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
910+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4
911+ %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
912+ %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
913+ store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4
914+ %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
915+ %18 = load i32, i32* %i.addr, align 4
916+ store i32 %18, i32* %block.captured9, align 4
917+ %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5
918+ %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4
919+ store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4
920+ %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic*
921+ %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)*
922+
923
924 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]]
925 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]]
926@@ -158,16 +193,24 @@ entry:
927 ; [[ConstInt2]] [[Event1]] [[Event2]]
928 ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]]
929
930-; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)*
931+; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic*
932+; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4)
933 ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)*
934-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]])
935-
936- %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
937- %11 = alloca [1 x i32]
938- %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0
939- %13 = load i8, i8* %c, align 1
940- %14 = zext i8 %13 to i32
941- store i32 %14, i32* %12, align 4
942+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]])
943+
944+ %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21)
945+ %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
946+ %24 = load i32, i32* %flags, align 4
947+ %25 = bitcast %struct.ndrange_t* %tmp11 to i8*
948+ %26 = bitcast %struct.ndrange_t* %ndrange to i8*
949+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false)
950+ %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
951+ %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)*
952+ %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
953+ %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0
954+ %30 = load i8, i8* %c, align 1
955+ %31 = zext i8 %30 to i32
956+ store i32 %31, i32* %29, align 4
957
958 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]]
959 ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]]
960@@ -182,14 +225,18 @@ entry:
961 ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)*
962 ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}})
963
964- %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12)
965- %16 = alloca [3 x i32]
966- %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0
967- store i32 1, i32* %17, align 4
968- %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1
969- store i32 2, i32* %18, align 4
970- %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2
971- store i32 4, i32* %19, align 4
972+ %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29)
973+ %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
974+ %34 = load i32, i32* %flags, align 4
975+ %35 = bitcast %struct.ndrange_t* %tmp12 to i8*
976+ %36 = bitcast %struct.ndrange_t* %ndrange to i8*
977+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false)
978+ %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0
979+ store i32 1, i32* %37, align 4
980+ %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1
981+ store i32 2, i32* %38, align 4
982+ %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2
983+ store i32 4, i32* %39, align 4
984
985 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]]
986 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]]
987@@ -206,24 +253,27 @@ entry:
988 ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)*
989 ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}})
990
991- %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17)
992+ %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37)
993 ret void
994 }
995
996+; Function Attrs: argmemonly nounwind
997+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1
998+
999 ; Function Attrs: convergent noinline nounwind optnone
1000 define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 {
1001 entry:
1002 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1003- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
1004+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
1005 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1006- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
1007- store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
1008- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
1009+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
1010+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
1011+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5
1012 %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4
1013 %conv = sext i8 %0 to i32
1014- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2
1015+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
1016 %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4
1017- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
1018+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
1019 %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4
1020 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2
1021 store i32 %conv, i32 addrspace(1)* %arrayidx, align 4
1022@@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i
1023 define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 {
1024 entry:
1025 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1026- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
1027+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
1028 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1029- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
1030- store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
1031- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1032+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
1033+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
1034+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5
1035 %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4
1036- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1037+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1038 %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4
1039 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1
1040 %2 = load i32, i32 addrspace(1)* %arrayidx, align 4
1041- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2
1042+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1043 %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4
1044- %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1045+ %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1046 %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4
1047 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4
1048 store i32 %2, i32 addrspace(1)* %arrayidx4, align 4
1049@@ -276,11 +326,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac
1050 entry:
1051 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1052 %p.addr = alloca i8 addrspace(3)*, align 4
1053- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
1054+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
1055 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1056- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
1057+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
1058 store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4
1059- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
1060+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
1061 ret void
1062 }
1063
1064@@ -300,13 +350,13 @@ entry:
1065 %p1.addr = alloca i8 addrspace(3)*, align 4
1066 %p2.addr = alloca i8 addrspace(3)*, align 4
1067 %p3.addr = alloca i8 addrspace(3)*, align 4
1068- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
1069+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
1070 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1071- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
1072+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
1073 store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4
1074 store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4
1075 store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4
1076- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
1077+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
1078 ret void
1079 }
1080
1081@@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*,
1082 ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*)
1083 ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)
1084
1085-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
1086+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
1087 attributes #1 = { argmemonly nounwind }
1088-attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
1089+attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
1090 attributes #3 = { nounwind }
1091
1092 !llvm.module.flags = !{!0}
1093-!opencl.enable.FP_CONTRACT = !{}
1094 !opencl.ocl.version = !{!1}
1095 !opencl.spir.version = !{!1}
1096-!opencl.used.extensions = !{!2}
1097-!opencl.used.optional.core.features = !{!2}
1098-!opencl.compiler.options = !{!2}
1099-!llvm.ident = !{!3}
1100+!llvm.ident = !{!2}
1101
1102 !0 = !{i32 1, !"wchar_size", i32 4}
1103 !1 = !{i32 2, i32 0}
1104-!2 = !{}
1105-!3 = !{!"clang version 7.0.0"}
1106-!4 = !{i32 1, i32 1, i32 0, i32 0}
1107-!5 = !{!"none", !"none", !"none", !"none"}
1108-!6 = !{!"int*", !"int*", !"int", !"char"}
1109-!7 = !{!"", !"", !"", !""}
1110-!8 = !{i1 false, i1 false, i1 false, i1 false}
1111-!9 = !{i32 0, i32 0, i32 0, i32 0}
1112+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
1113+!3 = !{i32 1, i32 1, i32 0, i32 0}
1114+!4 = !{!"none", !"none", !"none", !"none"}
1115+!5 = !{!"int*", !"int*", !"int", !"char"}
1116+!6 = !{!"", !"", !"", !""}
1117--
11181.8.3.1
1119
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch
deleted file mode 100644
index 9d25bbad..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch
+++ /dev/null
@@ -1,66 +0,0 @@
1From 7bbd0058362ac3bb5edd7a82d43e1785810776b3 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Fri, 29 Mar 2019 08:56:53 +0800
4Subject: [PATCH] dont export targets for binaries
5
6The projects using LLVM cmake modules look for target binaries in
7sysroot as a result which isn't desirable in this case and isn't needed
8either.
9
10Upstream-Status: Inappropriate [cross-compile specific]
11
12Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
13---
14 llvm/cmake/modules/AddLLVM.cmake | 9 ---------
15 llvm/cmake/modules/TableGen.cmake | 6 ------
16 2 files changed, 15 deletions(-)
17
18diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
19index 0df6845..b79f4fa 100644
20--- a/llvm/cmake/modules/AddLLVM.cmake
21+++ b/llvm/cmake/modules/AddLLVM.cmake
22@@ -866,12 +866,6 @@ macro(add_llvm_tool name)
23
24 if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
25 if( LLVM_BUILD_TOOLS )
26- if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
27- NOT LLVM_DISTRIBUTION_COMPONENTS)
28- set(export_to_llvmexports EXPORT LLVMExports)
29- set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True)
30- endif()
31-
32 install(TARGETS ${name}
33 ${export_to_llvmexports}
34 RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR}
35@@ -884,9 +878,6 @@ macro(add_llvm_tool name)
36 endif()
37 endif()
38 endif()
39- if( LLVM_BUILD_TOOLS )
40- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name})
41- endif()
42 set_target_properties(${name} PROPERTIES FOLDER "Tools")
43 endmacro(add_llvm_tool name)
44
45diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
46index 3c84ae7..141219f 100644
47--- a/llvm/cmake/modules/TableGen.cmake
48+++ b/llvm/cmake/modules/TableGen.cmake
49@@ -164,14 +164,8 @@ macro(add_tablegen target project)
50 endif()
51
52 if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
53- if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
54- NOT LLVM_DISTRIBUTION_COMPONENTS)
55- set(export_to_llvmexports EXPORT LLVMExports)
56- endif()
57-
58 install(TARGETS ${target}
59 ${export_to_llvmexports}
60 RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR})
61 endif()
62- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target})
63 endmacro()
64--
652.7.4
66
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch
deleted file mode 100644
index 0dfc537b..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch
+++ /dev/null
@@ -1,59 +0,0 @@
1From 6c33fb58869ffb17106047c45ab8d3856966eaf7 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Tue, 26 Mar 2019 14:11:29 +0800
4Subject: [PATCH] point to correct clang project and tblgen
5
6Point to correct path for clang project as per the way we unpack. Also
7let llvm-tblgen path be passed from recipe itself.
8
9Also since we're going to do the patching ourselves, no need to look for
10git through cmake.
11
12Upstream-Status: Inappropriate [OE specific]
13---
14 CMakeLists.txt | 8 ++++----
15 1 file changed, 4 insertions(+), 4 deletions(-)
16
17diff --git a/CMakeLists.txt b/CMakeLists.txt
18index 174133b..c769f08 100644
19--- a/CMakeLists.txt
20+++ b/CMakeLists.txt
21@@ -53,7 +53,7 @@ endif(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
22 include(AddLLVM)
23 include(TableGen)
24
25-find_package(Git REQUIRED)
26+#find_package(Git REQUIRED)
27
28 if (NOT WIN32)
29 add_subdirectory( linux_linker )
30@@ -80,7 +80,7 @@ set(TARGET_NAME ${COMMON_CLANG_LIBRARY_NAME}${BUILD_PLATFORM} )
31
32 if(NOT USE_PREBUILT_LLVM)
33 set(TARGET_BRANCH "ocl-open-80")
34- set(CLANG_SOURCE_DIR ${LLVM_SOURCE_DIR}/tools/clang)
35+ set(CLANG_SOURCE_DIR ${LLVM_SOURCE_DIR}/../clang)
36 set(CLANG_BASE_REVISION a03da8be08a208122e292016cb6cea1f30229677)
37
38 set(SPIRV_SOURCE_DIR ${LLVM_SOURCE_DIR}/projects/llvm-spirv)
39@@ -102,7 +102,7 @@ endif(NOT USE_PREBUILT_LLVM)
40 set (COMPILE_OPTIONS_TD opencl_clang_options.td)
41 set (COMPILE_OPTIONS_INC opencl_clang_options.inc)
42
43-set(LLVM_TABLEGEN_EXE "llvm-tblgen")
44+#set(LLVM_TABLEGEN_EXE "llvm-tblgen")
45 set(LLVM_TARGET_DEFINITIONS ${COMPILE_OPTIONS_TD})
46 if(USE_PREBUILT_LLVM)
47 set(TABLEGEN_ADDITIONAL -I ${LLVM_INCLUDE_DIRS})
48@@ -153,7 +153,7 @@ endif()
49
50 if(NOT USE_PREBUILT_LLVM)
51 set(CLANG_BINARY_DIR ${LLVM_BINARY_DIR}/tools/clang/)
52- set(CLANG_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}/tools/clang/)
53+ set(CLANG_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}/../clang/)
54 include_directories(
55 ${CLANG_BINARY_DIR}/include # for tablegened includes
56 ${CLANG_SOURCE_DIR}/include # for basic headers
57--
582.19.1
59
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch
deleted file mode 100644
index 2e935a13..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch
+++ /dev/null
@@ -1,294 +0,0 @@
1From c94ec28600255098ffb9d73d1b386a7c8a535590 Mon Sep 17 00:00:00 2001
2From: Andrew Savonichev <andrew.savonichev@intel.com>
3Date: Thu, 21 Feb 2019 11:02:10 +0000
4Subject: [PATCH 2/2] [OpenCL] Simplify LLVM IR generated for OpenCL blocks
5
6Summary:
7Emit direct call of block invoke functions when possible, i.e. in case the
8block is not passed as a function argument.
9Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()`
10
11Reviewers: Anastasia, yaxunl, svenvh
12
13Reviewed By: Anastasia
14
15Subscribers: cfe-commits
16
17Tags: #clang
18
19Differential Revision: https://reviews.llvm.org/D58388
20
21git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354568 91177308-0d34-0410-b5e6-96231b3b80d8
22
23Upstream-Status: Backport
24[https://github.com/llvm-mirror/clang/commit/eae71f8d05ce550c4e2595c9b7082cc2c7882c58]
25Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
26---
27 lib/CodeGen/CGBlocks.cpp | 77 +++++++++++++-------------
28 lib/CodeGen/CGOpenCLRuntime.cpp | 30 +++++++---
29 lib/CodeGen/CGOpenCLRuntime.h | 4 ++
30 test/CodeGenOpenCL/blocks.cl | 10 +---
31 test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 34 +++++++++---
32 5 files changed, 91 insertions(+), 64 deletions(-)
33
34diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
35index fa3c3ee..10a0238 100644
36--- a/lib/CodeGen/CGBlocks.cpp
37+++ b/lib/CodeGen/CGBlocks.cpp
38@@ -1261,52 +1261,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
39 ReturnValueSlot ReturnValue) {
40 const BlockPointerType *BPT =
41 E->getCallee()->getType()->getAs<BlockPointerType>();
42-
43 llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
44-
45- // Get a pointer to the generic block literal.
46- // For OpenCL we generate generic AS void ptr to be able to reuse the same
47- // block definition for blocks with captures generated as private AS local
48- // variables and without captures generated as global AS program scope
49- // variables.
50- unsigned AddrSpace = 0;
51- if (getLangOpts().OpenCL)
52- AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
53-
54- llvm::Type *BlockLiteralTy =
55- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
56-
57- // Bitcast the callee to a block literal.
58- BlockPtr =
59- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
60-
61- // Get the function pointer from the literal.
62- llvm::Value *FuncPtr =
63- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
64- CGM.getLangOpts().OpenCL ? 2 : 3);
65-
66- // Add the block literal.
67+ llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType();
68+ llvm::Value *Func = nullptr;
69+ QualType FnType = BPT->getPointeeType();
70+ ASTContext &Ctx = getContext();
71 CallArgList Args;
72
73- QualType VoidPtrQualTy = getContext().VoidPtrTy;
74- llvm::Type *GenericVoidPtrTy = VoidPtrTy;
75 if (getLangOpts().OpenCL) {
76- GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType();
77- VoidPtrQualTy =
78- getContext().getPointerType(getContext().getAddrSpaceQualType(
79- getContext().VoidTy, LangAS::opencl_generic));
80- }
81-
82- BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy);
83- Args.add(RValue::get(BlockPtr), VoidPtrQualTy);
84-
85- QualType FnType = BPT->getPointeeType();
86+ // For OpenCL, BlockPtr is already casted to generic block literal.
87+
88+ // First argument of a block call is a generic block literal casted to
89+ // generic void pointer, i.e. i8 addrspace(4)*
90+ llvm::Value *BlockDescriptor = Builder.CreatePointerCast(
91+ BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType());
92+ QualType VoidPtrQualTy = Ctx.getPointerType(
93+ Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic));
94+ Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy);
95+ // And the rest of the arguments.
96+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
97+
98+ // We *can* call the block directly unless it is a function argument.
99+ if (!isa<ParmVarDecl>(E->getCalleeDecl()))
100+ Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
101+ else {
102+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2);
103+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
104+ }
105+ } else {
106+ // Bitcast the block literal to a generic block literal.
107+ BlockPtr = Builder.CreatePointerCast(
108+ BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal");
109+ // Get pointer to the block invoke function
110+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3);
111
112- // And the rest of the arguments.
113- EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
114+ // First argument is a block literal casted to a void pointer
115+ BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy);
116+ Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy);
117+ // And the rest of the arguments.
118+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
119
120- // Load the function.
121- llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
122+ // Load the function.
123+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
124+ }
125
126 const FunctionType *FuncTy = FnType->castAs<FunctionType>();
127 const CGFunctionInfo &FnInfo =
128diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
129index 7f6f595..75003e5 100644
130--- a/lib/CodeGen/CGOpenCLRuntime.cpp
131+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
132@@ -123,6 +123,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
133 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
134 }
135
136+// Get the block literal from an expression derived from the block expression.
137+// OpenCL v2.0 s6.12.5:
138+// Block variable declarations are implicitly qualified with const. Therefore
139+// all block variables must be initialized at declaration time and may not be
140+// reassigned.
141+static const BlockExpr *getBlockExpr(const Expr *E) {
142+ const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop.
143+ while(!isa<BlockExpr>(E) && E != Prev) {
144+ Prev = E;
145+ E = E->IgnoreCasts();
146+ if (auto DR = dyn_cast<DeclRefExpr>(E)) {
147+ E = cast<VarDecl>(DR->getDecl())->getInit();
148+ }
149+ }
150+ return cast<BlockExpr>(E);
151+}
152+
153 /// Record emitted llvm invoke function and llvm block literal for the
154 /// corresponding block expression.
155 void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
156@@ -137,20 +154,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
157 EnqueuedBlockMap[E].Kernel = nullptr;
158 }
159
160+llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
161+ return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
162+}
163+
164 CGOpenCLRuntime::EnqueuedBlockInfo
165 CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
166 CGF.EmitScalarExpr(E);
167
168 // The block literal may be assigned to a const variable. Chasing down
169 // to get the block literal.
170- if (auto DR = dyn_cast<DeclRefExpr>(E)) {
171- E = cast<VarDecl>(DR->getDecl())->getInit();
172- }
173- E = E->IgnoreImplicit();
174- if (auto Cast = dyn_cast<CastExpr>(E)) {
175- E = Cast->getSubExpr();
176- }
177- auto *Block = cast<BlockExpr>(E);
178+ const BlockExpr *Block = getBlockExpr(E);
179
180 assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
181 "Block expression not emitted");
182diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
183index 750721f..4effc7e 100644
184--- a/lib/CodeGen/CGOpenCLRuntime.h
185+++ b/lib/CodeGen/CGOpenCLRuntime.h
186@@ -92,6 +92,10 @@ public:
187 /// \param Block block literal emitted for the block expression.
188 void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
189 llvm::Value *Block);
190+
191+ /// \return LLVM block invoke function emitted for an expression derived from
192+ /// the block expression.
193+ llvm::Function *getInvokeFunction(const Expr *E);
194 };
195
196 }
197diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
198index 19aacc3..ab5a2c6 100644
199--- a/test/CodeGenOpenCL/blocks.cl
200+++ b/test/CodeGenOpenCL/blocks.cl
201@@ -39,11 +39,8 @@ void foo(){
202 // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
203 // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
204 // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
205- // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
206 // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
207- // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
208- // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
209- // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
210+ // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
211 // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
212 // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
213 // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
214@@ -53,11 +50,8 @@ void foo(){
215 // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
216 // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
217 // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
218- // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
219 // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
220- // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
221- // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
222- // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
223+ // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
224
225 int (^ block_B)(void) = ^{
226 return i;
227diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
228index 8445016..1566912 100644
229--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
230+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
231@@ -312,9 +312,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
232 };
233
234 // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
235- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
236- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
237- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
238+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
239 block_A();
240
241 // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
242@@ -333,15 +331,35 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
243 unsigned size = get_kernel_work_group_size(block_A);
244
245 // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
246- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
247- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
248- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
249+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
250 block_A();
251
252+ // Make sure that block invoke function is resolved correctly after sequence of assignements.
253+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
254+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
255+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
256+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
257+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1,
258+ bl_t b1 = block_G;
259+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
260+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
261+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
262+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
263+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2,
264+ bl_t b2 = b1;
265+ // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
266+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*)
267+ // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
268+ b2(0);
269+ // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
270+ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
271+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
272+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
273+ size = get_kernel_preferred_work_group_size_multiple(b2);
274+
275 void (^block_C)(void) = ^{
276 callee(i, a);
277 };
278-
279 // Emits block literal on stack and block kernel [[INVLK3]].
280 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
281 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
282@@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
283 // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
284 // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
285 // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
286+// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
287 // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
288 // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
289-// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
290 // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
291 // COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
292--
2931.8.3.1
294
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch
deleted file mode 100644
index 510c7c6e..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch
+++ /dev/null
@@ -1,61 +0,0 @@
1From 29e2813a2ab7d5569860bb07892dfef7b5374d96 Mon Sep 17 00:00:00 2001
2From: Yaxun Liu <Yaxun.Liu@amd.com>
3Date: Tue, 26 Feb 2019 16:20:41 +0000
4Subject: [PATCH] [OpenCL] Fix assertion due to blocks
5
6A recent change caused assertion in CodeGenFunction::EmitBlockCallExpr when a block is called.
7
8There is code
9
10 Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
11getCalleeDecl calls Expr::getReferencedDeclOfCallee, which does not handle
12BlockExpr and returns nullptr, which causes isa to assert.
13
14This patch fixes that.
15
16Differential Revision: https://reviews.llvm.org/D58658
17
18
19git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354893 91177308-0d34-0410-b5e6-96231b3b80d8
20
21Upstream-Status: Backport
22[https://github.com/llvm-mirror/clang/commit/29e2813a2ab7d5569860bb07892dfef7b5374d96]
23Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
24---
25 lib/AST/Expr.cpp | 2 ++
26 test/CodeGenOpenCL/blocks.cl | 6 ++++++
27 2 files changed, 8 insertions(+)
28
29diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
30index aef1eab..85690c7 100644
31--- a/lib/AST/Expr.cpp
32+++ b/lib/AST/Expr.cpp
33@@ -1358,6 +1358,8 @@ Decl *Expr::getReferencedDeclOfCallee() {
34 return DRE->getDecl();
35 if (MemberExpr *ME = dyn_cast<MemberExpr>(CEE))
36 return ME->getMemberDecl();
37+ if (auto *BE = dyn_cast<BlockExpr>(CEE))
38+ return BE->getBlockDecl();
39
40 return nullptr;
41 }
42diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
43index ab5a2c6..c3e2685 100644
44--- a/test/CodeGenOpenCL/blocks.cl
45+++ b/test/CodeGenOpenCL/blocks.cl
46@@ -90,6 +90,12 @@ int get42() {
47 return blockArgFunc(^{return 42;});
48 }
49
50+// COMMON-LABEL: define {{.*}}@call_block
51+// call {{.*}}@__call_block_block_invoke
52+int call_block() {
53+ return ^int(int num) { return num; } (11);
54+}
55+
56 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size"
57 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align"
58
59--
601.8.3.1
61
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
deleted file mode 100644
index f536f0f2..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
+++ /dev/null
@@ -1,16 +0,0 @@
1FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:"
2
3SRC_URI_append_intel-x86-common = " \
4 git://github.com/intel/opencl-clang.git;protocol=https;branch=ocl-open-80;destsuffix=git/llvm/projects/opencl-clang;name=opencl-clang \
5 git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_80;destsuffix=git/llvm/projects/llvm-spirv;name=spirv \
6 file://0001-point-to-correct-clang.patch;patchdir=llvm/projects/opencl-clang \
7 file://0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch;patchdir=clang \
8 file://0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch;patchdir=clang \
9 file://0003-OpenCL-Fix-assertion-due-to-blocks.patch;patchdir=clang \
10 file://0001-dont-export-targets-for-binaries.patch \
11 file://0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch;patchdir=llvm/projects/llvm-spirv \
12 "
13
14SRCREV_opencl-clang = "daf5e4dd718477ae8cf89a283c653939d9182f15"
15SRCREV_spirv = "bd0f28fb92061d49c0f120b4dac3fd8956006745"
16
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/allow-to-find-cpp-generation-tool.patch b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/allow-to-find-cpp-generation-tool.patch
new file mode 100644
index 00000000..265fcfa2
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/allow-to-find-cpp-generation-tool.patch
@@ -0,0 +1,51 @@
1From a6361d635e5f3046853883f3ac06fb175116933c Mon Sep 17 00:00:00 2001
2From: Dongwon Kim <dongwon.kim@intel.com>
3Date: Sat, 21 Aug 2021 16:09:39 -0700
4Subject: [PATCH] Build not able to locate cpp_generation_tool.
5
6Upstream-Status: Inappropriate [oe specific]
7
8Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
9Signed-off-by: Dongwon Kim <dongwon.kim@intel.com>
10---
11 shared/source/built_ins/kernels/CMakeLists.txt | 10 +++++-----
12 1 file changed, 5 insertions(+), 5 deletions(-)
13
14diff --git a/shared/source/built_ins/kernels/CMakeLists.txt b/shared/source/built_ins/kernels/CMakeLists.txt
15index ed85a37c52..f7c9e79137 100644
16--- a/shared/source/built_ins/kernels/CMakeLists.txt
17+++ b/shared/source/built_ins/kernels/CMakeLists.txt
18@@ -107,9 +107,9 @@ function(compile_builtin core_type platform_it builtin bits builtin_options mode
19 )
20 add_custom_command(
21 OUTPUT ${OUTPUT_FILE_CPP}
22- COMMAND $<TARGET_FILE:cpp_generate_tool> --file ${BINARY_OUTPUT}.bin --output ${OUTPUT_FILE_CPP} --array ${mode}_${BASENAME} --device ${RELEASE_FILENAME}
23+ COMMAND cpp_generate_tool --file ${BINARY_OUTPUT}.bin --output ${OUTPUT_FILE_CPP} --array ${mode}_${BASENAME} --device ${RELEASE_FILENAME}
24 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
25- DEPENDS ${OUTPUT_FILES_BINARIES} $<TARGET_FILE:cpp_generate_tool>
26+ DEPENDS ${OUTPUT_FILES_BINARIES} cpp_generate_tool
27 )
28 list(APPEND BUILTINS_COMMANDS "${OUTPUT_FILE_CPP}")
29 else()
30@@ -159,9 +159,9 @@ function(generate_cpp_spirv builtin)
31 )
32 add_custom_command(
33 OUTPUT ${OUTPUT_FILE_CPP}
34- COMMAND $<TARGET_FILE:cpp_generate_tool> --file ${GENERATED_SPV_INPUT} --output ${OUTPUT_FILE_CPP} --array ${BASENAME}
35+ COMMAND cpp_generate_tool --file ${GENERATED_SPV_INPUT} --output ${OUTPUT_FILE_CPP} --array ${BASENAME}
36 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
37- DEPENDS ${GENERATED_SPV_INPUT} $<TARGET_FILE:cpp_generate_tool>
38+ DEPENDS ${GENERATED_SPV_INPUT} cpp_generate_tool
39 )
40 set(OUTPUT_LIST_CPP_FILES ${OUTPUT_LIST_CPP_FILES} ${OUTPUT_FILE_CPP} PARENT_SCOPE)
41 else()
42@@ -277,4 +277,4 @@ if(NOT "${OUTPUT_LIST_CPP_FILES}" STREQUAL "")
43 )
44 endif()
45
46-apply_macro_for_each_core_type("SUPPORTED")
47\ No newline at end of file
48+apply_macro_for_each_core_type("SUPPORTED")
49--
502.43.2
51
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/disable-werror.patch b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/disable-werror.patch
new file mode 100644
index 00000000..20d9b847
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/disable-werror.patch
@@ -0,0 +1,16 @@
1Upstream-Status: Inappropriate
2Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
3
4diff --git a/CMakeLists.txt b/CMakeLists.txt
5index d52e089778..bc0cf35014 100644
6--- a/CMakeLists.txt
7+++ b/CMakeLists.txt
8@@ -727,7 +727,7 @@ if(NOT MSVC)
9 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-noexcept-type") # Added for gtest
10 endif()
11 endif()
12- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Werror=vla")
13+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=vla")
14
15 if(USE_SANITIZE_UB)
16 check_cxx_compiler_flag(-fsanitize=undefined COMPILER_SUPPORTS_UNDEFINED_BEHAVIOR_SANITIZER)
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/external-ocloc.patch b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/external-ocloc.patch
new file mode 100644
index 00000000..5f93b7b6
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/external-ocloc.patch
@@ -0,0 +1,40 @@
1From 1f58c22992ddea4167b01b44448528de427f50d5 Mon Sep 17 00:00:00 2001
2From: Dongwon Kim <dongwon.kim@intel.com>
3Date: Wed, 2 Mar 2022 15:52:45 -0800
4Subject: [PATCH] external ocloc
5
6Upstream-Status: Inappropriate
7
8Signed-off-by: Dongwon Kim <dongwon.kim@intel.com>
9---
10 cmake/ocloc_cmd_prefix.cmake | 14 ++++++++------
11 1 file changed, 8 insertions(+), 6 deletions(-)
12
13diff --git a/cmake/ocloc_cmd_prefix.cmake b/cmake/ocloc_cmd_prefix.cmake
14index 2b44330831..03067c9df0 100644
15--- a/cmake/ocloc_cmd_prefix.cmake
16+++ b/cmake/ocloc_cmd_prefix.cmake
17@@ -4,12 +4,14 @@
18 # SPDX-License-Identifier: MIT
19 #
20
21-if(WIN32)
22- set(ocloc_cmd_prefix ocloc)
23-else()
24- if(DEFINED NEO__IGC_LIBRARY_PATH)
25- set(ocloc_cmd_prefix ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${NEO__IGC_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib>" $<TARGET_FILE:ocloc>)
26+if(NOT DEFINED ocloc_cmd_prefix)
27+ if(WIN32)
28+ set(ocloc_cmd_prefix ocloc)
29 else()
30- set(ocloc_cmd_prefix ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib>" $<TARGET_FILE:ocloc>)
31+ if(DEFINED NEO__IGC_LIBRARY_PATH)
32+ set(ocloc_cmd_prefix LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${NEO__IGC_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib> $<TARGET_FILE:ocloc>)
33+ else()
34+ set(ocloc_cmd_prefix LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib> $<TARGET_FILE:ocloc>)
35+ endif()
36 endif()
37 endif()
38--
392.37.3
40
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/f10439aea214984a060566831f63d3aa198ef1b8.patch b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/f10439aea214984a060566831f63d3aa198ef1b8.patch
new file mode 100644
index 00000000..b7fcb3d1
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/f10439aea214984a060566831f63d3aa198ef1b8.patch
@@ -0,0 +1,54 @@
1From f10439aea214984a060566831f63d3aa198ef1b8 Mon Sep 17 00:00:00 2001
2From: Pawel Cieslak <pawel.cieslak@intel.com>
3Date: Tue, 14 May 2024 14:20:24 +0000
4Subject: [PATCH] fix: include <algorithm> where std::find is used
5
6Related-To: NEO-11375
7Signed-off-by: Pawel Cieslak <pawel.cieslak@intel.com>
8
9Upstream-Status: Backport [https://github.com/intel/compute-runtime/commit/f10439aea214984a060566831f63d3aa198ef1b8]
10Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
11---
12 .../sources/linux/nl_api/test_sysman_iaf_nl_api_prelim.cpp | 3 ++-
13 shared/test/unit_test/gmm_helper/gmm_resource_info_tests.cpp | 4 +++-
14 2 files changed, 5 insertions(+), 2 deletions(-)
15
16diff --git a/level_zero/sysman/test/unit_tests/sources/linux/nl_api/test_sysman_iaf_nl_api_prelim.cpp b/level_zero/sysman/test/unit_tests/sources/linux/nl_api/test_sysman_iaf_nl_api_prelim.cpp
17index 6ab1b751d866..8a224752c8fc 100644
18--- a/level_zero/sysman/test/unit_tests/sources/linux/nl_api/test_sysman_iaf_nl_api_prelim.cpp
19+++ b/level_zero/sysman/test/unit_tests/sources/linux/nl_api/test_sysman_iaf_nl_api_prelim.cpp
20@@ -1,5 +1,5 @@
21 /*
22- * Copyright (C) 2021-2023 Intel Corporation
23+ * Copyright (C) 2021-2024 Intel Corporation
24 *
25 * SPDX-License-Identifier: MIT
26 *
27@@ -13,6 +13,7 @@
28
29 #include "gtest/gtest.h"
30
31+#include <algorithm>
32 #include <limits>
33 #include <netlink/handlers.h>
34
35diff --git a/shared/test/unit_test/gmm_helper/gmm_resource_info_tests.cpp b/shared/test/unit_test/gmm_helper/gmm_resource_info_tests.cpp
36index 206c272c5bb2..e961248e73a3 100644
37--- a/shared/test/unit_test/gmm_helper/gmm_resource_info_tests.cpp
38+++ b/shared/test/unit_test/gmm_helper/gmm_resource_info_tests.cpp
39@@ -1,5 +1,5 @@
40 /*
41- * Copyright (C) 2021-2023 Intel Corporation
42+ * Copyright (C) 2021-2024 Intel Corporation
43 *
44 * SPDX-License-Identifier: MIT
45 *
46@@ -17,6 +17,8 @@
47
48 #include "gtest/gtest.h"
49
50+#include <algorithm>
51+
52 using namespace NEO;
53
54 struct MockGmmHandleAllocator : NEO::GmmHandleAllocator {
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_24.13.29138.7.bb b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_24.13.29138.7.bb
new file mode 100644
index 00000000..7280ed03
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_24.13.29138.7.bb
@@ -0,0 +1,65 @@
1SUMMARY = "The Intel(R) Graphics Compute Runtime for OpenCL(TM)"
2DESCRIPTION = "The Intel(R) Graphics Compute Runtime for OpenCL(TM) \
3is an open source project to converge Intel's development efforts \
4on OpenCL(TM) compute stacks supporting the GEN graphics hardware \
5architecture."
6
7LICENSE = "MIT & Apache-2.0"
8LIC_FILES_CHKSUM = "file://LICENSE.md;md5=eca6ec6997e18db166db7109cdbe611c \
9 file://third_party/opencl_headers/LICENSE;md5=3b83ef96387f14655fc854ddc3c6bd57"
10
11SRC_URI = "git://github.com/intel/compute-runtime.git;protocol=https;branch=releases/24.13 \
12 file://disable-werror.patch \
13 file://allow-to-find-cpp-generation-tool.patch \
14 file://external-ocloc.patch \
15 file://f10439aea214984a060566831f63d3aa198ef1b8.patch \
16 "
17
18SRCREV = "7131387cdbb02d480a225c70daef913a6c024a6e"
19
20S = "${WORKDIR}/git"
21
22DEPENDS += " intel-graphics-compiler gmmlib libva qemu-native"
23
24RDEPENDS:${PN} += " intel-graphics-compiler gmmlib"
25
26inherit cmake pkgconfig qemu
27
28COMPATIBLE_HOST = '(x86_64).*-linux'
29COMPATIBLE_HOST:libc-musl = "null"
30
31EXTRA_OECMAKE = " \
32 -DIGC_DIR=${STAGING_INCDIR}/igc \
33 -DBUILD_TYPE=Release \
34 -DSKIP_UNIT_TESTS=1 \
35 -DCCACHE_ALLOWED=FALSE \
36 -DNEO_DISABLE_LD_LLD=ON \
37 -DNEO_DISABLE_LD_GOLD=ON \
38 "
39
40EXTRA_OECMAKE:append:class-target = " \
41 -Docloc_cmd_prefix=ocloc \
42 -DCMAKE_CROSSCOMPILING_EMULATOR=${WORKDIR}/qemuwrapper \
43 "
44
45PACKAGECONFIG ??= ""
46PACKAGECONFIG[levelzero] = "-DBUILD_WITH_L0=ON, -DBUILD_WITH_L0=OFF, level-zero"
47
48do_configure:prepend:class-target () {
49 # Write out a qemu wrapper that will be used by cmake.
50 qemu_binary="${@qemu_wrapper_cmdline(d, d.getVar('STAGING_DIR_HOST'), [d.expand('${B}/bin'),d.expand('${STAGING_DIR_HOST}${libdir}'),d.expand('${STAGING_DIR_HOST}${base_libdir}')])}"
51 cat > ${WORKDIR}/qemuwrapper << EOF
52#!/bin/sh
53$qemu_binary "\$@"
54EOF
55 chmod +x ${WORKDIR}/qemuwrapper
56}
57
58FILES:${PN} += " \
59 ${libdir}/intel-opencl/libigdrcl.so \
60 ${libdir}/libocloc.so \
61 "
62
63FILES:${PN}-dev = "${includedir}"
64
65UPSTREAM_CHECK_GITTAGREGEX = "(?P<pver>\d+(\.\d+)+)"
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch
new file mode 100644
index 00000000..377081fd
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch
@@ -0,0 +1,35 @@
1From 3d71670f8ad5b54d434c2f5f71713bb1d5433ae4 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Tue, 12 Oct 2021 23:46:42 +0800
4Subject: [PATCH] BiF/CMakeLists.txt: remove opt from DEPENDS
5
6Otherwise it starts failing with:
7
8| ninja: error: 'IGC/VectorCompiler/lib/BiF/opt', needed by 'IGC/VectorCompiler/lib/BiF/VCBiFPrintfOCL32.opt.bc', missing and no known rule to make it
9
10We don't need to explicitly make sure opt is built when
11using prebuilt binaries.
12
13Upstream-Status: Inappropriate
14
15Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
16---
17 IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake | 2 +-
18 1 file changed, 1 insertion(+), 1 deletion(-)
19
20diff --git a/IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake b/IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake
21index d20d7f887..882e09fea 100644
22--- a/IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake
23+++ b/IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake
24@@ -109,7 +109,7 @@ function(vc_build_bif TARGET RES_FILE CMCL_SRC_PATH BIF_NAME PTR_BIT_SIZE)
25 COMMENT "vc_build_bif: Translating CMCL builtins: ${BIF_CLANG_BC_NAME_FINAL} -> ${BIF_OPT_BC_NAME}"
26 COMMAND CMCLTranslatorTool -o ${BIF_CMCL_BC_NAME} ${BIF_CLANG_BC_NAME_FINAL}
27 COMMAND ${LLVM_OPT_EXE} ${IGC_LLVM_DEPENDENT_OPT_FLAGS} --O2 -o ${BIF_OPT_BC_NAME} ${BIF_CMCL_BC_NAME}
28- DEPENDS CMCLTranslatorTool ${LLVM_OPT_EXE} ${BIF_CLANG_BC_PATH_FINAL}
29+ DEPENDS CMCLTranslatorTool ${BIF_CLANG_BC_PATH_FINAL}
30 BYPRODUCTS ${BIF_OPT_BC_PATH}
31 SOURCES ${CMCL_SRC_PATH})
32 set(${RES_FILE} ${BIF_OPT_BC_NAME} PARENT_SCOPE)
33--
342.43.2
35
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch
new file mode 100644
index 00000000..dca75e22
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch
@@ -0,0 +1,31 @@
1From e69a3181482e5f442756a61c7b683914072884f1 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Mon, 9 Jan 2023 11:43:05 +0800
4Subject: [PATCH] external/SPIRV-Tools: change path to tools and headers
5
6We clone the SPIRV headers and tools in a different directory to ensure
7file path substitutions take place.
8
9Upstream-Status: Inappropriate
10
11Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
12
13---
14 external/SPIRV-Tools/CMakeLists.txt | 4 ++--
15 1 file changed, 2 insertions(+), 2 deletions(-)
16
17diff --git a/external/SPIRV-Tools/CMakeLists.txt b/external/SPIRV-Tools/CMakeLists.txt
18index 9afa5746c..7ca24d5dc 100644
19--- a/external/SPIRV-Tools/CMakeLists.txt
20+++ b/external/SPIRV-Tools/CMakeLists.txt
21@@ -43,8 +43,8 @@ else() #By default use build from sources
22 message(STATUS "[SPIRV-Tools] : Building from source")
23 message(STATUS "[SPIRV-Tools] : Current source dir: ${CMAKE_CURRENT_SOURCE_DIR}")
24
25- set(SPIRV-Headers_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../SPIRV-Headers") # used in subdirectory
26- set(SPIRV-Tools_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../SPIRV-Tools")
27+ set(SPIRV-Headers_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../SPIRV-Headers") # used in subdirectory
28+ set(SPIRV-Tools_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../SPIRV-Tools")
29
30 set(SPIRV-Tools_OUTPUT_DIR "${IGC_OPTION__OUTPUT_DIR}/external/SPIRV-Tools/build")
31 set(IGC_BUILD__SPIRV-Headers_DIR "${SPIRV-Headers_SOURCE_DIR}")
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-fix-tblgen.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-fix-tblgen.patch
new file mode 100644
index 00000000..39443931
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-fix-tblgen.patch
@@ -0,0 +1,24 @@
1From 5648568e597acd0fed82aac3e6aef0f95a1b78d1 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Thu, 19 May 2022 22:50:09 +0800
4Subject: [PATCH] fix tblgen
5
6Upstream-Status: Inappropriate [OE specific]
7
8---
9 IGC/cmake/igc_llvm.cmake | 2 +-
10 1 file changed, 1 insertion(+), 1 deletion(-)
11
12diff --git a/IGC/cmake/igc_llvm.cmake b/IGC/cmake/igc_llvm.cmake
13index 10322533c..9020cb3c8 100644
14--- a/IGC/cmake/igc_llvm.cmake
15+++ b/IGC/cmake/igc_llvm.cmake
16@@ -52,7 +52,7 @@ else()
17 set(LLVM_OPT_EXE "opt" CACHE STRING "")
18
19 set(LLVM_TABLEGEN_EXE "llvm-tblgen")
20- if(CMAKE_CROSSCOMPILING)
21+ if(TRUE)
22 if(DEFINED LLVM_TABLEGEN)
23 set(LLVM_TABLEGEN_EXE ${LLVM_TABLEGEN})
24 else()
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch
new file mode 100644
index 00000000..650130a8
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch
@@ -0,0 +1,34 @@
1From 0559332abd04b6c8bc70171d201f43d2e4735336 Mon Sep 17 00:00:00 2001
2From: Lee Chee Yang <chee.yang.lee@intel.com>
3Date: Wed, 2 Sep 2020 08:28:35 +0800
4Subject: [PATCH] Improve Reproducibility for src package
5
6Improve reproducibility for intel-graphics-compiler-src package.
7needs to pass build path as environment variable to the build.
8this only works on bison 3.7 onward, hence check for bison version
9before adding the flags.
10Upstream-Status: Inappropriate [applying --file-prefix-map in such way does not work for upstream]
11Signed-off-by: Lee Chee Yang <chee.yang.lee@intel.com>
12
13---
14 visa/CMakeLists.txt | 7 +++++--
15 1 file changed, 5 insertions(+), 2 deletions(-)
16
17diff --git a/visa/CMakeLists.txt b/visa/CMakeLists.txt
18index 6be467587..930c386a6 100644
19--- a/visa/CMakeLists.txt
20+++ b/visa/CMakeLists.txt
21@@ -123,8 +123,11 @@ endif()
22 set(bison_output_file ${CMAKE_CURRENT_BINARY_DIR}/CISA.tab.cpp)
23 set(flex_output_file ${CMAKE_CURRENT_BINARY_DIR}/lex.CISA.cpp)
24
25-BISON_TARGET(CISAParser CISA.y ${bison_output_file} COMPILE_FLAGS "-vt -p CISA")
26-FLEX_TARGET(CISAScanner CISA.l ${flex_output_file} COMPILE_FLAGS "-PCISA ${WIN_FLEX_FLAG}")
27+if(BISON_VERSION VERSION_GREATER_EQUAL "3.7.0")
28+ set(BISON_EXTRA_FLAGS " --file-prefix-map=$ENV{B}=/igc/ ")
29+endif()
30+BISON_TARGET(CISAParser CISA.y ${bison_output_file} COMPILE_FLAGS "-l -vt -p CISA ${BISON_EXTRA_FLAGS} ")
31+FLEX_TARGET(CISAScanner CISA.l ${flex_output_file} COMPILE_FLAGS "-PCISA -L ${WIN_FLEX_FLAG} ")
32 ADD_FLEX_BISON_DEPENDENCY(CISAScanner CISAParser)
33 set(CISAScanner_dependencies)
34
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_1.0.16510.2.bb b/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_1.0.16510.2.bb
new file mode 100644
index 00000000..24eb97bd
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_1.0.16510.2.bb
@@ -0,0 +1,75 @@
1SUMMARY = "The Intel(R) Graphics Compiler for OpenCL(TM)"
2DESCRIPTION = "The Intel(R) Graphics Compiler for OpenCL(TM) is an \
3llvm based compiler for OpenCL(TM) targeting Intel Gen graphics \
4hardware architecture."
5
6LICENSE = "MIT & Apache-2.0"
7LIC_FILES_CHKSUM = "file://IGC/BiFModule/Implementation/ExternalLibraries/libclc/LICENSE.TXT;md5=311cfc1a5b54bab8ed34a0b5fba4373e \
8 file://LICENSE.md;md5=488d74376edf2765f6e78d271543dde3 \
9 file://NOTICES.txt;md5=b81a52411c84df3419f20bad4d755880"
10
11SRC_URI = "git://github.com/intel/intel-graphics-compiler.git;protocol=https;name=igc;branch=releases/igc-1.0.16510 \
12 git://github.com/intel/vc-intrinsics.git;protocol=https;destsuffix=git/vc-intrinsics;name=vc;nobranch=1 \
13 git://github.com/KhronosGroup/SPIRV-Tools.git;protocol=https;destsuffix=git/SPIRV-Tools;name=spirv-tools;branch=main \
14 git://github.com/KhronosGroup/SPIRV-Headers.git;protocol=https;destsuffix=git/SPIRV-Headers;name=spirv-headers;branch=main \
15 file://0003-Improve-Reproducibility-for-src-package.patch \
16 file://0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch \
17 file://0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch \
18 "
19
20SRC_URI:append:class-native = " file://0001-fix-tblgen.patch"
21
22SRCREV_igc = "c2495d45f37fadd963ad22eb0cc1a8235a306775"
23SRCREV_vc = "f9c34404d0ea9abad83875a10bd48d88cea90ebd"
24SRCREV_spirv-tools = "f0cc85efdbbe3a46eae90e0f915dc1509836d0fc"
25SRCREV_spirv-headers = "1c6bb2743599e6eb6f37b2969acc0aef812e32e3"
26
27SRCREV_FORMAT = "igc_vc_spirv-tools_spirv-headers"
28
29# Used to replace with relative path in reproducibility patch
30export B
31
32S = "${WORKDIR}/git"
33
34inherit cmake pkgconfig qemu python3native
35
36CXXFLAGS:append = " -Wno-error=nonnull"
37
38COMPATIBLE_HOST = '(x86_64).*-linux'
39COMPATIBLE_HOST:libc-musl = "null"
40
41DEPENDS += " flex-native bison-native clang clang-cross-x86_64 opencl-clang qemu-native python3-mako-native"
42
43RDEPENDS:${PN} += "opencl-clang"
44
45PACKAGECONFIG ??= "vc"
46PACKAGECONFIG[vc] = "-DIGC_BUILD__VC_ENABLED=ON -DIGC_OPTION__LINK_KHRONOS_SPIRV_TRANSLATOR=ON -DIGC_OPTION__SPIRV_TRANSLATOR_MODE=Prebuilds,-DIGC_BUILD__VC_ENABLED=OFF,"
47
48EXTRA_OECMAKE = " \
49 -DIGC_OPTION__LLVM_PREFERRED_VERSION=${LLVMVERSION} \
50 -DVC_INTRINSICS_SRC="${S}/vc-intrinsics" \
51 -DIGC_OPTION__LLVM_MODE=Prebuilds \
52 -DLLVM_TABLEGEN=${STAGING_BINDIR_NATIVE}/llvm-tblgen \
53 -DLLVM_LINK_EXE=${STAGING_BINDIR_NATIVE}/llvm-link \
54 -DCLANG_EXE=${STAGING_BINDIR_NATIVE}/clang \
55 -DCMAKE_CROSSCOMPILING_EMULATOR=${WORKDIR}/qemuwrapper \
56 "
57
58do_configure:prepend:class-target () {
59 # Write out a qemu wrapper that will be used by cmake.
60 qemu_binary="${@qemu_wrapper_cmdline(d, d.getVar('STAGING_DIR_HOST'), [d.expand('${STAGING_DIR_HOST}${libdir}'),d.expand('${STAGING_DIR_HOST}${base_libdir}')])}"
61 cat > ${WORKDIR}/qemuwrapper << EOF
62#!/bin/sh
63$qemu_binary "\$@"
64EOF
65 chmod +x ${WORKDIR}/qemuwrapper
66}
67
68UPSTREAM_CHECK_GITTAGREGEX = "^igc-(?P<pver>(?!19\..*)\d+(\.\d+)+)$"
69
70FILES:${PN} += " \
71 ${libdir}/igc/NOTICES.txt \
72 "
73
74# libigc.so contains buildpaths
75INSANE_SKIP:${PN} += "buildpaths"
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch
new file mode 100644
index 00000000..031a77c7
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch
@@ -0,0 +1,49 @@
1From 5aea653e611b59c70e529a1bd71885a509831557 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Tue, 1 Aug 2023 11:15:31 +0800
4Subject: [PATCH] cl_headers/CMakeLists.txt: use clang from native sysroot
5
6Allow clang to be found in target sysroot for target builds and dont try
7to compile cross binaries, we do that ourselves.
8
9Upstream-Status: Inappropriate [oe-specific]
10Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
11---
12 CMakeLists.txt | 8 ++++----
13 cl_headers/CMakeLists.txt | 2 +-
14 2 files changed, 5 insertions(+), 5 deletions(-)
15
16diff --git a/CMakeLists.txt b/CMakeLists.txt
17index 5864009..60ba39e 100644
18--- a/CMakeLists.txt
19+++ b/CMakeLists.txt
20@@ -35,10 +35,10 @@ set(CMAKE_MODULE_PATH
21
22 include(CMakeFunctions)
23
24-if(CMAKE_CROSSCOMPILING AND OPENCL_CLANG_BUILD_EXTERNAL)
25- include(CrossCompile)
26- llvm_create_cross_target(${PROJECT_NAME} NATIVE "" Release)
27-endif()
28+#if(CMAKE_CROSSCOMPILING AND OPENCL_CLANG_BUILD_EXTERNAL)
29+# include(CrossCompile)
30+# llvm_create_cross_target(${PROJECT_NAME} NATIVE "" Release)
31+#endif()
32
33 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
34 set(USE_PREBUILT_LLVM ON)
35diff --git a/cl_headers/CMakeLists.txt b/cl_headers/CMakeLists.txt
36index 16cabb7..4423536 100644
37--- a/cl_headers/CMakeLists.txt
38+++ b/cl_headers/CMakeLists.txt
39@@ -1,6 +1,6 @@
40 set(CL_HEADERS_LIB cl_headers)
41 if(USE_PREBUILT_LLVM)
42- find_program(CLANG_COMMAND clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
43+ find_program(CLANG_COMMAND clang PATHS ${LLVM_TOOLS_BINARY_DIR})
44 else()
45 set(CLANG_COMMAND $<TARGET_FILE:clang>)
46 endif()
47--
482.37.3
49
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-Request-native-clang-only-when-cross-compiling-464.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-Request-native-clang-only-when-cross-compiling-464.patch
new file mode 100644
index 00000000..2f1814f8
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-Request-native-clang-only-when-cross-compiling-464.patch
@@ -0,0 +1,60 @@
1From 43c806ef321b1f677a49d28c89fb7ffecf539c2d Mon Sep 17 00:00:00 2001
2From: Tim Creech <timothy.m.creech@intel.com>
3Date: Wed, 28 Jun 2023 03:45:51 -0400
4Subject: [PATCH 2/2] Request native clang only when cross-compiling (#464)
5
6* Request native clang only when cross-compiling
7
8LLVM_USE_HOST_TOOLS may be set if LLVM is configured with
9LLVM_OPTIMIZED_TABLEGEN, which does not necessarily indicate
10cross-compilation or that clang will only execute on the target.
11
12By checking that CMAKE_CROSSCOMPILING is set, we ensure that we only
13build/use clang again if necessary for host execution.
14
15* fixup: CMAKE_CROSSCOMPILING implies LLVM_USE_HOST_TOOLS
16
17Co-authored-by: Wenju He <wenju.he@intel.com>
18
19* fixup: also use CMAKE_CROSSCOMPILING in top-level CMakeLists.txt
20
21---------
22
23Co-authored-by: Wenju He <wenju.he@intel.com>
24
25Upstream-Status: Backport [https://github.com/intel/opencl-clang/commit/53843eee13cfb2357919ee02714a43bef1af0f86]
26Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
27---
28 CMakeLists.txt | 2 +-
29 cl_headers/CMakeLists.txt | 2 +-
30 2 files changed, 2 insertions(+), 2 deletions(-)
31
32diff --git a/CMakeLists.txt b/CMakeLists.txt
33index e772de9..5864009 100644
34--- a/CMakeLists.txt
35+++ b/CMakeLists.txt
36@@ -35,7 +35,7 @@ set(CMAKE_MODULE_PATH
37
38 include(CMakeFunctions)
39
40-if(LLVM_USE_HOST_TOOLS AND OPENCL_CLANG_BUILD_EXTERNAL)
41+if(CMAKE_CROSSCOMPILING AND OPENCL_CLANG_BUILD_EXTERNAL)
42 include(CrossCompile)
43 llvm_create_cross_target(${PROJECT_NAME} NATIVE "" Release)
44 endif()
45diff --git a/cl_headers/CMakeLists.txt b/cl_headers/CMakeLists.txt
46index 18296c2..16cabb7 100644
47--- a/cl_headers/CMakeLists.txt
48+++ b/cl_headers/CMakeLists.txt
49@@ -4,7 +4,7 @@ if(USE_PREBUILT_LLVM)
50 else()
51 set(CLANG_COMMAND $<TARGET_FILE:clang>)
52 endif()
53-if(LLVM_USE_HOST_TOOLS AND NOT OPENCL_CLANG_BUILD_EXTERNAL)
54+if(CMAKE_CROSSCOMPILING AND NOT OPENCL_CLANG_BUILD_EXTERNAL)
55 build_native_tool(clang CLANG_COMMAND)
56 endif()
57
58--
592.37.3
60
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc
new file mode 100644
index 00000000..31a3fb21
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc
@@ -0,0 +1,34 @@
1SUMMARY = "Common clang is a thin wrapper library around clang"
2DESCRIPTION = "Common clang has OpenCL-oriented API and is capable \
3 to compile OpenCL C kernels to SPIR-V modules."
4
5LICENSE = "NCSA"
6LIC_FILES_CHKSUM = "file://LICENSE;md5=e8a15bf1416762a09ece07e44c79118c"
7
8SRC_URI = "git://github.com/intel/opencl-clang.git;branch=${BRANCH};protocol=https \
9 file://0002-Request-native-clang-only-when-cross-compiling-464.patch \
10 file://0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch \
11 "
12S = "${WORKDIR}/git"
13
14inherit cmake
15DEPENDS += "clang"
16DEPENDS:append:class-target = " opencl-clang-native"
17
18COMPATIBLE_HOST = '(x86_64).*-linux'
19COMPATIBLE_HOST:libc-musl = "null"
20
21DEPENDS += " spirv-llvm-translator"
22
23EXTRA_OECMAKE += "\
24 -DLLVM_TABLEGEN_EXE=${STAGING_BINDIR_NATIVE}/llvm-tblgen \
25 -DCMAKE_SKIP_RPATH=TRUE \
26 -DPREFERRED_LLVM_VERSION=${LLVMVERSION} \
27 "
28
29do_install:append:class-native() {
30 install -d ${D}${bindir}
31 install -m 0755 ${B}/bin/linux_resource_linker ${D}${bindir}/
32}
33
34BBCLASSEXTEND = "native nativesdk"
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_15.0.0.bb b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_15.0.0.bb
new file mode 100644
index 00000000..e946c31c
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_15.0.0.bb
@@ -0,0 +1,5 @@
1require opencl-clang.inc
2
3SRCREV = "60fd799cc58755c16d951f9ebfde6d0f9b8554dd"
4
5BRANCH = "ocl-open-150"