summaryrefslogtreecommitdiffstats
path: root/dynamic-layers
diff options
context:
space:
mode:
Diffstat (limited to 'dynamic-layers')
-rwxr-xr-xdynamic-layers/clang-layer/recipes-core/dnn/files/run-ptest15
-rw-r--r--dynamic-layers/clang-layer/recipes-core/dnn/onednn_3.5.3.bb56
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend13
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch156
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch1119
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch66
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch59
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch294
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch61
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend16
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/0002-Build-not-able-to-locate-cpp_generation_tool.patch41
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/0003-external-ocloc.patch38
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_25.13.33276.16.bb61
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch32
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-Build-not-able-to-locate-BiFManager-bin.patch27
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch30
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-fix-tblgen.patch23
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch33
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_2.10.10.bb77
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch49
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-Request-native-clang-only-when-cross-compiling-464.patch60
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc33
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_15.0.0.bb5
-rw-r--r--dynamic-layers/openembedded-layer/recipes-bsp/amt/files/0001-LMS-fix-build-issue-with-gcc-15.patch32
-rw-r--r--dynamic-layers/openembedded-layer/recipes-bsp/amt/files/lms_drop_rpath_1921.0.0.0.diff31
-rw-r--r--dynamic-layers/openembedded-layer/recipes-bsp/amt/lms_1921.0.0.0.bb39
-rw-r--r--dynamic-layers/openembedded-layer/recipes-bsp/amt/lms_2406.0.0.0.bb43
-rw-r--r--dynamic-layers/openembedded-layer/recipes-bsp/thermald/thermald_2.5.8.bb43
-rw-r--r--dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0001-Add-print-function-to-print-test-run-status-in-ptest.patch53
-rw-r--r--dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0001-Fix-QA-Issues.patch36
-rw-r--r--dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0002-cmake-don-t-build-for-32-bit-targets.patch52
-rw-r--r--dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/run-ptest2
-rw-r--r--dynamic-layers/openembedded-layer/recipes-core/ispc/ispc_1.24.0.bb62
-rw-r--r--dynamic-layers/openembedded-layer/recipes-core/levelzero/level-zero_1.21.1.bb32
-rw-r--r--dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0001-Fix-the-compilation-warning-when-using-gcc-13-25.patch99
-rw-r--r--dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0001-linux-npu-driver-fix-multilib-install-issue.patch28
-rw-r--r--dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0002-Fix-compilation-failure-with-GCC-14.patch110
-rw-r--r--dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver_1.17.0.bb43
-rw-r--r--dynamic-layers/openembedded-layer/recipes-oneapi/dpcpp-compiler/intel-oneapi-dpcpp-cpp-runtime_2024.0.0-49819.bb54
-rw-r--r--dynamic-layers/openembedded-layer/recipes-oneapi/dpcpp-compiler/intel-oneapi-dpcpp-cpp_2024.0.0-49819.bb45
-rw-r--r--dynamic-layers/openembedded-layer/recipes-oneapi/embree/embree_4.3.3.bb30
-rw-r--r--dynamic-layers/openembedded-layer/recipes-oneapi/ipp/intel-oneapi-ipp_2021.10.0-653.bb35
-rw-r--r--dynamic-layers/openembedded-layer/recipes-oneapi/mkl/intel-oneapi-mkl_2024.0.0-49656.bb74
-rw-r--r--dynamic-layers/openembedded-layer/recipes-oneapi/oidn/oidn_2.1.0.bb27
-rw-r--r--dynamic-layers/openembedded-layer/recipes-oneapi/openvkl/openvkl_2.0.1.bb40
-rw-r--r--dynamic-layers/openembedded-layer/recipes-oneapi/ospray/ospray/0001-Fix-GCC11-Compile-Error-in-benchmark_register.h.patch28
-rw-r--r--dynamic-layers/openembedded-layer/recipes-oneapi/ospray/ospray_3.2.0.bb36
-rw-r--r--dynamic-layers/openembedded-layer/recipes-oneapi/rkcommon/rkcommon_1.14.0.bb17
-rw-r--r--dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl/0001-CMakeLists-disable-Werror.patch38
-rw-r--r--dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl/0001-Ignore-STATIC_ASSERTs-and-NULL-define-for-os-and-ut-builds.patch62
-rw-r--r--dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl_03.00.00.0499.bb45
51 files changed, 1746 insertions, 1854 deletions
diff --git a/dynamic-layers/clang-layer/recipes-core/dnn/files/run-ptest b/dynamic-layers/clang-layer/recipes-core/dnn/files/run-ptest
new file mode 100755
index 00000000..82d4df58
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-core/dnn/files/run-ptest
@@ -0,0 +1,15 @@
1#!/bin/sh
2
3cd tests
4./api-c
5if [ $? -eq 0 ]; then
6 echo 'PASS: api-c'
7else
8 echo 'FAIL: api-c'
9fi
10./test_c_symbols-c
11if [ $? -eq 0 ]; then
12 echo 'PASS: test_c_symbols-c'
13else
14 echo 'FAIL: test_c_symbols-c'
15fi
diff --git a/dynamic-layers/clang-layer/recipes-core/dnn/onednn_3.5.3.bb b/dynamic-layers/clang-layer/recipes-core/dnn/onednn_3.5.3.bb
new file mode 100644
index 00000000..6d4f69a3
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-core/dnn/onednn_3.5.3.bb
@@ -0,0 +1,56 @@
1HOMEPAGE = "https://www.oneapi.com"
2SUMMARY = "Deep Neural Network Library"
3DESCRIPTION = "This software is a user mode library that accelerates\
4deep-learning applications and frameworks on Intel architecture."
5LICENSE = "Apache-2.0 & BSD-3-Clause & BSL-1.0"
6LIC_FILES_CHKSUM = "file://LICENSE;md5=3b64000f6e7d52516017622a37a94ce9 \
7 file://tests/gtests/gtest/LICENSE;md5=cbbd27594afd089daa160d3a16dd515a \
8 file://src/cpu/x64/xbyak/COPYRIGHT;md5=3b9bf048d063d54cdb28964db558bcc7 \
9 file://src/common/ittnotify/LICENSE.BSD;md5=e671ff178b24a95a382ba670503c66fb \
10 "
11SECTION = "lib"
12
13inherit pkgconfig cmake ptest
14
15DNN_BRANCH = "rls-v${@'.'.join(d.getVar('PV').split('.')[0:2])}"
16
17SRCREV = "66f0cb9eb66affd2da3bf5f8d897376f04aae6af"
18SRC_URI = "git://github.com/oneapi-src/oneDNN.git;branch=${DNN_BRANCH};protocol=https \
19 file://run-ptest \
20 "
21
22UPSTREAM_CHECK_GITTAGREGEX = "^v(?P<pver>(\d+(\.\d+)+))$"
23
24CVE_PRODUCT = "intel:math_kernel_library"
25
26COMPATIBLE_HOST = '(x86_64).*-linux'
27COMPATIBLE_HOST:libc-musl = 'null'
28
29EXTRA_OECMAKE += " \
30 -DDNNL_LIBRARY_TYPE=SHARED \
31 -DDNNL_BUILD_EXAMPLES=ON \
32 -DDNNL_BUILD_TESTS=ON \
33 -DDNNL_CPU_RUNTIME=OMP \
34 -DDNNL_ARCH_OPT_FLAGS="" \
35 -DCMAKE_SKIP_RPATH=ON \
36 -DONEDNN_BUILD_GRAPH=OFF \
37 "
38
39PACKAGECONFIG ??= "gpu"
40PACKAGECONFIG[gpu] = "-DDNNL_GPU_RUNTIME=OCL, , opencl-headers virtual/opencl-icd, intel-compute-runtime"
41
42do_install:append () {
43 install -d ${D}${bindir}/mkl-dnn/tests/benchdnn/inputs
44 install -m 0755 ${B}/tests/benchdnn/benchdnn ${D}${bindir}/mkl-dnn/tests/benchdnn
45 cp -r ${B}/tests/benchdnn/inputs/* ${D}${bindir}/mkl-dnn/tests/benchdnn/inputs
46}
47
48do_install_ptest () {
49 install -d ${D}${PTEST_PATH}/tests
50 install -m 0755 ${B}/tests/api-c ${D}${PTEST_PATH}/tests
51 install -m 0755 ${B}/tests/test_c_symbols-c ${D}${PTEST_PATH}/tests
52}
53
54PACKAGES =+ "${PN}-test"
55
56FILES:${PN}-test = "${bindir}/mkl-dnn/*"
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend
deleted file mode 100644
index f8d5a252..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend
+++ /dev/null
@@ -1,13 +0,0 @@
1FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:"
2
3DEPENDS_append = " opencl-clang-native"
4LLVM_TARGETS_TO_BUILD = "X86"
5
6do_install_append_intel-x86-common() {
7 DESTDIR=${D} ninja -v install-cmake-exports
8}
9
10LIBCPLUSPLUS = ""
11
12# undefined reference to `__atomic_load' on i*86.
13COMPATIBLE_HOST = '(x86_64).*-linux'
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch
deleted file mode 100644
index 1c491402..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch
+++ /dev/null
@@ -1,156 +0,0 @@
1From 39a3ac0065c23d1e2d55dfd8792cc28a146a4307 Mon Sep 17 00:00:00 2001
2From: Alexey Bader <alexey.bader@intel.com>
3Date: Tue, 19 Feb 2019 15:19:06 +0000
4Subject: [PATCH 1/2] [OpenCL] Change type of block pointer for OpenCL
5
6Summary:
7
8For some reason OpenCL blocks in LLVM IR are represented as function pointers.
9These pointers do not point to any real function and never get called. Actually
10they point to some structure, which in turn contains pointer to the real block
11invoke function.
12This patch changes represntation of OpenCL blocks in LLVM IR from function
13pointers to pointers to `%struct.__block_literal_generic`.
14Such representation allows to avoid unnecessary bitcasts and simplifies
15further processing (e.g. translation to SPIR-V ) of the module for targets
16which do not support function pointers.
17
18Patch by: Alexey Sotkin.
19
20Reviewers: Anastasia, yaxunl, svenvh
21
22Reviewed By: Anastasia
23
24Subscribers: alexbatashev, cfe-commits
25
26Tags: #clang
27
28Differential Revision: https://reviews.llvm.org/D58277
29
30git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354337 91177308-0d34-0410-b5e6-96231b3b80d8
31
32Upstream-Status: Backport
33[https://github.com/llvm-mirror/clang/commit/283f308bdb5893bab1f36791711346e746045f94]
34Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
35---
36 lib/CodeGen/CodeGenTypes.cpp | 4 +++-
37 test/CodeGenOpenCL/blocks.cl | 18 ++++++++----------
38 test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 18 +++++++++---------
39 3 files changed, 20 insertions(+), 20 deletions(-)
40
41diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
42index 2acf1ac..93b3ebf 100644
43--- a/lib/CodeGen/CodeGenTypes.cpp
44+++ b/lib/CodeGen/CodeGenTypes.cpp
45@@ -637,7 +637,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
46
47 case Type::BlockPointer: {
48 const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType();
49- llvm::Type *PointeeType = ConvertTypeForMem(FTy);
50+ llvm::Type *PointeeType = CGM.getLangOpts().OpenCL
51+ ? CGM.getGenericBlockLiteralType()
52+ : ConvertTypeForMem(FTy);
53 unsigned AS = Context.getTargetAddressSpace(FTy);
54 ResultType = llvm::PointerType::get(PointeeType, AS);
55 break;
56diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
57index 675240c..19aacc3 100644
58--- a/test/CodeGenOpenCL/blocks.cl
59+++ b/test/CodeGenOpenCL/blocks.cl
60@@ -35,11 +35,10 @@ void foo(){
61 // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
62 // SPIR: %[[i_value:.*]] = load i32, i32* %i
63 // SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
64- // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
65- // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
66- // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
67- // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
68- // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
69+ // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic*
70+ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
71+ // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
72+ // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
73 // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
74 // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
75 // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
76@@ -50,11 +49,10 @@ void foo(){
77 // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
78 // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
79 // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
80- // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
81- // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
82- // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
83- // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
84- // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic*
85+ // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)*
86+ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
87+ // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
88+ // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
89 // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
90 // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
91 // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
92diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
93index 4732194..8445016 100644
94--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
95+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
96@@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t;
97
98 // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
99 // COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
100-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
101+// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*)
102
103 // For anonymous blocks without captures, emit block literals as global variable.
104 // COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
105@@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
106 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
107 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
108 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
109- // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
110- // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
111- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
112+ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic*
113+ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
114+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
115 // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
116 // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
117 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
118@@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
119 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
120 // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
121 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
122- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
123- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
124+ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic*
125+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
126 // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
127 // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
128 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
129@@ -300,13 +300,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
130 // Emits global block literal [[BLG8]] and invoke function [[INVG8]].
131 // The full type of these expressions are long (and repeated elsewhere), so we
132 // capture it as part of the regex for convenience and clarity.
133- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
134+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A
135 void (^const block_A)(void) = ^{
136 return;
137 };
138
139 // Emits global block literal [[BLG9]] and invoke function [[INVG9]].
140- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
141+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B
142 void (^const block_B)(local void *) = ^(local void *a) {
143 return;
144 };
145@@ -346,7 +346,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
146 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
147 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
148 // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
149- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
150+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
151 // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
152 // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
153 // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
154--
1551.8.3.1
156
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
deleted file mode 100644
index 4a528674..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
+++ /dev/null
@@ -1,1119 +0,0 @@
1From 9ce0fe02fd6cda5fb29fbb0d5037a1798a810b8a Mon Sep 17 00:00:00 2001
2From: Alexey Sotkin <alexey.sotkin@intel.com>
3Date: Thu, 21 Feb 2019 17:14:36 +0300
4Subject: [PATCH 1/3] Update LowerOpenCL pass to handle new blocks
5 represntation in LLVM IR
6
7Upstream-Status: Backport
8[https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/bd6ddfaf7232cd81c7f2fe9877e66f286731bd8e]
9Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
10---
11 lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 413 ++++++++----------------------
12 test/global_block.ll | 71 ++---
13 test/literal-struct.ll | 31 ++-
14 test/transcoding/block_w_struct_return.ll | 47 ++--
15 test/transcoding/enqueue_kernel.ll | 237 ++++++++++-------
16 5 files changed, 317 insertions(+), 482 deletions(-)
17
18diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
19index 50e1838..b42a4ec 100644
20--- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
21+++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
22@@ -1,303 +1,110 @@
23-//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===//
24-//
25-// The LLVM/SPIRV Translator
26-//
27-// This file is distributed under the University of Illinois Open Source
28-// License. See LICENSE.TXT for details.
29-//
30-// Copyright (c) 2018 Intel Corporation. All rights reserved.
31-//
32-// Permission is hereby granted, free of charge, to any person obtaining a
33-// copy of this software and associated documentation files (the "Software"),
34-// to deal with the Software without restriction, including without limitation
35-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
36-// and/or sell copies of the Software, and to permit persons to whom the
37-// Software is furnished to do so, subject to the following conditions:
38-//
39-// Redistributions of source code must retain the above copyright notice,
40-// this list of conditions and the following disclaimers.
41-// Redistributions in binary form must reproduce the above copyright notice,
42-// this list of conditions and the following disclaimers in the documentation
43-// and/or other materials provided with the distribution.
44-// Neither the names of Intel Corporation, nor the names of its
45-// contributors may be used to endorse or promote products derived from this
46-// Software without specific prior written permission.
47-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
48-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
49-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
50-// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
52-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
53-// THE SOFTWARE.
54-//
55-//===----------------------------------------------------------------------===//
56-//
57-// SPIR-V specification doesn't allow function pointers, so SPIR-V translator
58-// is designed to fail if a value with function type (except calls) is occured.
59-// Currently there is only two cases, when function pointers are generating in
60-// LLVM IR in OpenCL - block calls and device side enqueue built-in calls.
61-//
62-// In both cases values with function type used as intermediate representation
63-// for block literal structure.
64-//
65-// This pass is designed to find such cases and simplify them to avoid any
66-// function pointer types occurrences in LLVM IR in 4 steps.
67-//
68-// 1. Find all function pointer allocas, like
69-// %block = alloca void () *
70-//
71-// Then find a single store to that alloca:
72-// %blockLit = alloca <{ i32, i32, ...}>, align 4
73-// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()*
74-// > store void ()* %0, void ()** %block, align 4
75-//
76-// And replace the alloca users by new instructions which used stored value
77-// %blockLit itself instead of function pointer alloca %block.
78-//
79-// 2. Find consecutive casts from block literal type to i8 addrspace(4)*
80-// used function pointers as an intermediate type:
81-// %0 = bitcast <{ i32, i32 }> %block to void() *
82-// %1 = addrspacecast void() * %0 to i8 addrspace(4)*
83-// And simplify them:
84-// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)*
85-//
86-// 3. Find all unused instructions with function pointer type occured after
87-// pp.1-2 and remove them.
88-//
89-// 4. Find unused globals with function pointer type, like
90-// @block = constant void ()*
91-// bitcast ({ i32, i32 }* @__block_literal_global to void ()*
92-//
93-// And remove them.
94-//
95-//===----------------------------------------------------------------------===//
96-#define DEBUG_TYPE "spv-lower-ocl-blocks"
97-
98-#include "OCLUtil.h"
99-#include "SPIRVInternal.h"
100-
101-#include "llvm/ADT/SetVector.h"
102-#include "llvm/Analysis/ValueTracking.h"
103-#include "llvm/IR/GlobalVariable.h"
104-#include "llvm/IR/InstIterator.h"
105-#include "llvm/IR/Module.h"
106-#include "llvm/Pass.h"
107-#include "llvm/PassSupport.h"
108-#include "llvm/Support/Casting.h"
109-
110-using namespace llvm;
111-
112-namespace {
113-
114-static void
115-removeUnusedFunctionPtrInst(Instruction *I,
116- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
117- for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) {
118- Instruction *OpI = dyn_cast<Instruction>(I->getOperand(OpIdx));
119- I->setOperand(OpIdx, nullptr);
120- if (OpI && OpI != I && OpI->user_empty())
121- FuncPtrInsts.insert(OpI);
122- }
123- I->eraseFromParent();
124-}
125-
126-static bool isFuncPtrAlloca(const AllocaInst *AI) {
127- auto *ET = dyn_cast<PointerType>(AI->getAllocatedType());
128- return ET && ET->getElementType()->isFunctionTy();
129-}
130-
131-static bool hasFuncPtrType(const Value *V) {
132- auto *PT = dyn_cast<PointerType>(V->getType());
133- return PT && PT->getElementType()->isFunctionTy();
134-}
135-
136-static bool isFuncPtrInst(const Instruction *I) {
137- if (auto *AI = dyn_cast<AllocaInst>(I))
138- return isFuncPtrAlloca(AI);
139-
140- for (auto &Op : I->operands()) {
141- if (auto *AI = dyn_cast<AllocaInst>(Op))
142- return isFuncPtrAlloca(AI);
143-
144- auto *OpI = dyn_cast<Instruction>(&Op);
145- if (OpI && OpI != I && hasFuncPtrType(OpI))
146- return true;
147- }
148- return false;
149-}
150-
151-static StoreInst *findSingleStore(AllocaInst *AI) {
152- StoreInst *Store = nullptr;
153- for (auto *U : AI->users()) {
154- if (!isa<StoreInst>(U))
155- continue; // not a store
156- if (Store)
157- return nullptr; // there are more than one stores
158- Store = dyn_cast<StoreInst>(U);
159- }
160- return Store;
161-}
162-
163-static void fixFunctionPtrAllocaUsers(AllocaInst *AI) {
164- // Find and remove a single store to alloca
165- auto *SingleStore = findSingleStore(AI);
166- assert(SingleStore && "More than one store to the function pointer alloca");
167- auto *StoredVal = SingleStore->getValueOperand();
168- SingleStore->eraseFromParent();
169-
170- // Find loads from the alloca and replace thier users
171- for (auto *U : AI->users()) {
172- auto *LI = dyn_cast<LoadInst>(U);
173- if (!LI)
174- continue;
175-
176- for (auto *U : LI->users()) {
177- auto *UInst = cast<Instruction>(U);
178- auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast(
179- StoredVal, UInst->getType(), "", UInst);
180- UInst->replaceAllUsesWith(Cast);
181- }
182- }
183-}
184-
185-static int getBlockLiteralIdx(const Function &F) {
186- StringRef FName = F.getName();
187- if (isEnqueueKernelBI(FName))
188- return FName.contains("events") ? 7 : 4;
189- if (isKernelQueryBI(FName))
190- return FName.contains("for_ndrange") ? 2 : 1;
191- if (FName.startswith("__") && FName.contains("_block_invoke"))
192- return F.hasStructRetAttr() ? 1 : 0;
193-
194- return -1; // No block literal argument
195-}
196-
197-static bool hasBlockLiteralArg(const Function &F) {
198- return getBlockLiteralIdx(F) != -1;
199-}
200-
201-static bool simplifyFunctionPtrCasts(Function &F) {
202- bool Changed = false;
203- int BlockLiteralIdx = getBlockLiteralIdx(F);
204- for (auto *U : F.users()) {
205- auto *Call = dyn_cast<CallInst>(U);
206- if (!Call)
207- continue;
208- if (Call->getFunction()->getName() == F.getName().str() + "_kernel")
209- continue; // Skip block invoke function calls inside block invoke kernels
210-
211- const DataLayout &DL = F.getParent()->getDataLayout();
212- auto *BlockLiteral = Call->getOperand(BlockLiteralIdx);
213- auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL);
214- if (isa<GlobalVariable>(BlockLiteralVal))
215- continue; // nothing to do with globals
216-
217- auto *BlockLiteralAlloca = cast<AllocaInst>(BlockLiteralVal);
218- assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() &&
219- "Function type shouldn't be there");
220-
221- auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast(
222- BlockLiteralAlloca, BlockLiteral->getType(), "", Call);
223- BlockLiteral->replaceAllUsesWith(NewBlockLiteral);
224- Changed |= true;
225- }
226- return Changed;
227-}
228-
229-static void
230-findFunctionPtrAllocas(Module &M,
231- SmallVectorImpl<AllocaInst *> &FuncPtrAllocas) {
232- for (auto &F : M) {
233- if (F.isDeclaration())
234- continue;
235- for (auto &I : instructions(F)) {
236- auto *AI = dyn_cast<AllocaInst>(&I);
237- if (!AI || !isFuncPtrAlloca(AI))
238- continue;
239- FuncPtrAllocas.push_back(AI);
240- }
241- }
242-}
243-
244-static void
245-findUnusedFunctionPtrInsts(Module &M,
246- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
247- for (auto &F : M) {
248- if (F.isDeclaration())
249- continue;
250- for (auto &I : instructions(F))
251- if (I.user_empty() && isFuncPtrInst(&I))
252- FuncPtrInsts.insert(&I);
253- }
254-}
255-
256-static void
257-findUnusedFunctionPtrGlbs(Module &M,
258- SmallVectorImpl<GlobalVariable *> &FuncPtrGlbs) {
259- for (auto &GV : M.globals()) {
260- if (!GV.user_empty())
261- continue;
262- auto *GVType = dyn_cast<PointerType>(GV.getType()->getElementType());
263- if (GVType && GVType->getElementType()->isFunctionTy())
264- FuncPtrGlbs.push_back(&GV);
265- }
266-}
267-
268-class SPIRVLowerOCLBlocks : public ModulePass {
269-
270-public:
271- SPIRVLowerOCLBlocks() : ModulePass(ID) {}
272-
273- bool runOnModule(Module &M) {
274- bool Changed = false;
275-
276- // 1. Find function pointer allocas and fix their users
277- SmallVector<AllocaInst *, 16> FuncPtrAllocas;
278- findFunctionPtrAllocas(M, FuncPtrAllocas);
279-
280- Changed |= !FuncPtrAllocas.empty();
281- for (auto *AI : FuncPtrAllocas)
282- fixFunctionPtrAllocaUsers(AI);
283-
284- // 2. Simplify consecutive casts which use function pointer types
285- for (auto &F : M)
286- if (hasBlockLiteralArg(F))
287- Changed |= simplifyFunctionPtrCasts(F);
288-
289- // 3. Cleanup unused instructions with function pointer type
290- // which are occured after pp. 1-2
291- SmallSetVector<Instruction *, 16> FuncPtrInsts;
292- findUnusedFunctionPtrInsts(M, FuncPtrInsts);
293-
294- Changed |= !FuncPtrInsts.empty();
295- while (!FuncPtrInsts.empty()) {
296- Instruction *I = FuncPtrInsts.pop_back_val();
297- removeUnusedFunctionPtrInst(I, FuncPtrInsts);
298- }
299-
300- // 4. Find and remove unused global variables with function pointer type
301- SmallVector<GlobalVariable *, 16> FuncPtrGlbs;
302- findUnusedFunctionPtrGlbs(M, FuncPtrGlbs);
303-
304- Changed |= !FuncPtrGlbs.empty();
305- for (auto *GV : FuncPtrGlbs)
306- GV->eraseFromParent();
307-
308- return Changed;
309- }
310-
311- static char ID;
312-}; // class SPIRVLowerOCLBlocks
313-
314-char SPIRVLowerOCLBlocks::ID = 0;
315-
316-} // namespace
317-
318-INITIALIZE_PASS(
319- SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks",
320- "Remove function pointers occured in case of using OpenCL blocks", false,
321- false)
322-
323-llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() {
324- return new SPIRVLowerOCLBlocks();
325-}
326+//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===//
327+//
328+// The LLVM/SPIRV Translator
329+//
330+// This file is distributed under the University of Illinois Open Source
331+// License. See LICENSE.TXT for details.
332+//
333+// Copyright (c) 2018 Intel Corporation. All rights reserved.
334+//
335+// Permission is hereby granted, free of charge, to any person obtaining a
336+// copy of this software and associated documentation files (the "Software"),
337+// to deal with the Software without restriction, including without limitation
338+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
339+// and/or sell copies of the Software, and to permit persons to whom the
340+// Software is furnished to do so, subject to the following conditions:
341+//
342+// Redistributions of source code must retain the above copyright notice,
343+// this list of conditions and the following disclaimers.
344+// Redistributions in binary form must reproduce the above copyright notice,
345+// this list of conditions and the following disclaimers in the documentation
346+// and/or other materials provided with the distribution.
347+// Neither the names of Intel Corporation, nor the names of its
348+// contributors may be used to endorse or promote products derived from this
349+// Software without specific prior written permission.
350+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
351+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
352+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
353+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
354+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
355+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
356+// THE SOFTWARE.
357+//
358+//===----------------------------------------------------------------------===//
359+//
360+// SPIR-V specification doesn't allow function pointers, so SPIR-V translator
361+// is designed to fail if a value with function type (except calls) is occured.
362+// Currently there is only two cases, when function pointers are generating in
363+// LLVM IR in OpenCL - block calls and device side enqueue built-in calls.
364+//
365+// In both cases values with function type used as intermediate representation
366+// for block literal structure.
367+//
368+// In LLVM IR produced by clang, blocks are represented with the following
369+// structure:
370+// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
371+// Pointers to block invoke functions are stored in the third field. Clang
372+// replaces inderect function calls in all cases except if block is passed as a
373+// function argument. Note that it is somewhat unclear if the OpenCL C spec
374+// should allow passing blocks as function argumernts. This pass is not supposed
375+// to work correctly with such functions.
376+// Clang though has to store function pointers to this structure. Purpose of
377+// this pass is to replace store of function pointers(not allowed in SPIR-V)
378+// with null pointers.
379+//
380+//===----------------------------------------------------------------------===//
381+#define DEBUG_TYPE "spv-lower-ocl-blocks"
382+
383+#include "SPIRVInternal.h"
384+
385+#include "llvm/IR/Module.h"
386+#include "llvm/Pass.h"
387+#include "llvm/Support/Regex.h"
388+
389+using namespace llvm;
390+
391+namespace {
392+
393+static bool isBlockInvoke(Function &F) {
394+ static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$");
395+ return BlockInvokeRegex.match(F.getName());
396+}
397+
398+class SPIRVLowerOCLBlocks : public ModulePass {
399+
400+public:
401+ SPIRVLowerOCLBlocks() : ModulePass(ID) {}
402+
403+ bool runOnModule(Module &M) {
404+ bool Changed = false;
405+ for (Function &F : M) {
406+ if (!isBlockInvoke(F))
407+ continue;
408+ for (User *U : F.users()) {
409+ if (!isa<Constant>(U))
410+ continue;
411+ Constant *Null = Constant::getNullValue(U->getType());
412+ if (U != Null) {
413+ U->replaceAllUsesWith(Null);
414+ Changed = true;
415+ }
416+ }
417+ }
418+ return Changed;
419+ }
420+
421+ static char ID;
422+};
423+
424+char SPIRVLowerOCLBlocks::ID = 0;
425+
426+} // namespace
427+
428+INITIALIZE_PASS(
429+ SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks",
430+ "Remove function pointers occured in case of using OpenCL blocks", false,
431+ false)
432+
433+llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() {
434+ return new SPIRVLowerOCLBlocks();
435+}
436diff --git a/test/global_block.ll b/test/global_block.ll
437index a9267d8..efb4cf3 100644
438--- a/test/global_block.ll
439+++ b/test/global_block.ll
440@@ -16,7 +16,7 @@
441 ; RUN: llvm-spirv %t.bc -o %t.spv
442 ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM
443
444-target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
445+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
446 target triple = "spir-unknown-unknown"
447
448 ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke"
449@@ -26,71 +26,56 @@ target triple = "spir-unknown-unknown"
450 ; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]]
451 ; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]]
452
453-;; This variable is not needed in SPIRV
454-; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1
455-; CHECK-LLVM-NOT: @block_kernel.b1
456-@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8
457+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
458
459-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
460+@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4
461+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
462
463-; Function Attrs: convergent nounwind
464-define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
465+; Function Attrs: convergent noinline nounwind optnone
466+define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
467 entry:
468- %res.addr = alloca i32 addrspace(1)*, align 8
469- store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
470-
471+ %res.addr = alloca i32 addrspace(1)*, align 4
472+ store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4
473 ; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]]
474 ; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5)
475- %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
476-
477- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
478- store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14
479+ %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
480+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4
481+ store i32 %call, i32 addrspace(1)* %0, align 4
482 ret void
483 }
484
485-; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]]
486+; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]]
487 ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}}
488 ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}}
489 ; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}})
490-; Function Attrs: convergent nounwind
491+; Function Attrs: convergent noinline nounwind optnone
492 define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 {
493 entry:
494- %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
495+ %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
496 %i.addr = alloca i32, align 4
497- store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
498- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
499- store i32 %i, i32* %i.addr, align 4, !tbaa !14
500- %0 = load i32, i32* %i.addr, align 4, !tbaa !14
501+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
502+ store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
503+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
504+ store i32 %i, i32* %i.addr, align 4
505+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
506+ %0 = load i32, i32* %i.addr, align 4
507 %add = add nsw i32 %0, 1
508 ret i32 %add
509 }
510
511-attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
512-attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
513+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
514+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
515 attributes #2 = { convergent }
516
517 !llvm.module.flags = !{!0}
518-!opencl.enable.FP_CONTRACT = !{}
519 !opencl.ocl.version = !{!1}
520 !opencl.spir.version = !{!1}
521-!opencl.used.extensions = !{!2}
522-!opencl.used.optional.core.features = !{!2}
523-!opencl.compiler.options = !{!2}
524-!llvm.ident = !{!3}
525+!llvm.ident = !{!2}
526
527 !0 = !{i32 1, !"wchar_size", i32 4}
528 !1 = !{i32 2, i32 0}
529-!2 = !{}
530-!3 = !{!"clang version 7.0.0"}
531-!4 = !{i32 1}
532-!5 = !{!"none"}
533-!6 = !{!"int*"}
534-!7 = !{!""}
535-!8 = !{i1 false}
536-!9 = !{i32 0}
537-!10 = !{!11, !11, i64 0}
538-!11 = !{!"any pointer", !12, i64 0}
539-!12 = !{!"omnipotent char", !13, i64 0}
540-!13 = !{!"Simple C/C++ TBAA"}
541-!14 = !{!15, !15, i64 0}
542-!15 = !{!"int", !12, i64 0}
543+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
544+!3 = !{i32 1}
545+!4 = !{!"none"}
546+!5 = !{!"int*"}
547+!6 = !{!""}
548diff --git a/test/literal-struct.ll b/test/literal-struct.ll
549index c52170a..52a731a 100644
550--- a/test/literal-struct.ll
551+++ b/test/literal-struct.ll
552@@ -2,7 +2,7 @@
553 ; structs, i.e. structs whose type has no name. Typicaly clang generate such
554 ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with
555 ; the following command:
556-; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll
557+; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll
558
559 ; literal-struct.cl:
560 ; void foo()
561@@ -14,25 +14,28 @@
562 ; RUN: llvm-as < %s | llvm-spirv -spirv-text -o %t
563 ; RUN: FileCheck < %t %s
564
565-; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0
566-; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}}
567+; CHECK: TypeInt [[Int:[0-9]+]] 32 0
568+; CHECK: TypeInt [[Int8:[0-9]+]] 8 0
569+; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]]
570+; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]]
571
572 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
573 target triple = "spir"
574
575-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
576+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
577+
578+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
579 ; CHECK: ConstantComposite [[StructType]]
580
581-; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct.
582-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4
583+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4
584 ; CHECK: ConstantNull [[StructType]]
585
586 ; Function Attrs: convergent noinline nounwind optnone
587 define spir_func void @foo() #0 {
588 entry:
589- %myBlock = alloca void () addrspace(4)*, align 4
590- store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4
591- call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
592+ %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4
593+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4
594+ call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
595 ret void
596 }
597
598@@ -40,14 +43,14 @@ entry:
599 define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 {
600 entry:
601 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
602- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
603+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
604 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
605- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
606- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
607+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
608+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
609 ret void
610 }
611
612-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
613+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
614 attributes #1 = { convergent }
615
616 !llvm.module.flags = !{!0}
617@@ -57,4 +60,4 @@ attributes #1 = { convergent }
618
619 !0 = !{i32 1, !"wchar_size", i32 4}
620 !1 = !{i32 2, i32 0}
621-!2 = !{!"clang version 8.0.0 "}
622+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
623diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll
624index 76e29f0..df89b13 100644
625--- a/test/transcoding/block_w_struct_return.ll
626+++ b/test/transcoding/block_w_struct_return.ll
627@@ -16,6 +16,8 @@
628 ; res[tid] = kernelBlock(aa).a - 6;
629 ; }
630
631+; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll
632+
633 ; RUN: llvm-as %s -o %t.bc
634 ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt
635 ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV
636@@ -27,12 +29,14 @@
637 ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke"
638
639 ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32
640+; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8
641+; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]]
642 ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]]
643 ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]]
644
645 ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7
646 ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7
647-; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}}
648+; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}}
649 ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]]
650
651 ; CHECK-LLVM: %[[StructA:.*]] = type { i32 }
652@@ -41,20 +45,21 @@
653 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
654 target triple = "spir64-unknown-unknown"
655
656+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
657 %struct.A = type { i32 }
658
659-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
660+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8
661
662 ; Function Attrs: convergent noinline nounwind optnone
663-define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 {
664+define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
665 entry:
666 %res.addr = alloca i32 addrspace(1)*, align 8
667- %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8
668+ %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8
669 %tid = alloca i64, align 8
670 %aa = alloca %struct.A, align 4
671 %tmp = alloca %struct.A, align 4
672 store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8
673- store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8
674+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8
675 %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4
676 store i64 %call, i64* %tid, align 8
677 %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8
678@@ -63,7 +68,7 @@ entry:
679 store i32 -1, i32 addrspace(1)* %arrayidx, align 4
680 %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0
681 store i32 5, i32* %a, align 4
682- call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
683+ call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
684 %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0
685 %2 = load i32, i32* %a1, align 4
686 %sub = sub nsw i32 %2, 6
687@@ -78,10 +83,10 @@ entry:
688 define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 {
689 entry:
690 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
691- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8
692+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8
693 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
694- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
695- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8
696+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
697+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8
698 %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0
699 store i32 6, i32* %a1, align 4
700 %0 = bitcast %struct.A* %agg.result to i8*
701@@ -96,30 +101,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r
702 ; Function Attrs: convergent nounwind readnone
703 declare spir_func i64 @_Z13get_global_idj(i32) #3
704
705-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
706-attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
707+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
708+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
709 attributes #2 = { argmemonly nounwind }
710 attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
711 attributes #4 = { convergent nounwind readnone }
712 attributes #5 = { convergent }
713
714 !llvm.module.flags = !{!0}
715-!opencl.enable.FP_CONTRACT = !{}
716 !opencl.ocl.version = !{!1}
717 !opencl.spir.version = !{!1}
718-!opencl.used.extensions = !{!2}
719-!opencl.used.optional.core.features = !{!2}
720-!opencl.compiler.options = !{!2}
721-!llvm.ident = !{!3}
722+!llvm.ident = !{!2}
723
724 !0 = !{i32 1, !"wchar_size", i32 4}
725 !1 = !{i32 2, i32 0}
726-!2 = !{}
727-!3 = !{!"clang version 7.0.0"}
728-!4 = !{i32 1}
729-!5 = !{!"none"}
730-!6 = !{!"int*"}
731-!7 = !{!""}
732-!8 = !{i1 false}
733-!9 = !{i32 0}
734-
735+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
736+!3 = !{i32 1}
737+!4 = !{!"none"}
738+!5 = !{!"int*"}
739+!6 = !{!""}
740diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll
741index 0d29c71..435871d 100644
742--- a/test/transcoding/enqueue_kernel.ll
743+++ b/test/transcoding/enqueue_kernel.ll
744@@ -51,11 +51,12 @@
745 ; ModuleID = 'enqueue_kernel.cl'
746 source_filename = "enqueue_kernel.cl"
747 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
748-target triple = "spir-unknown-unknown"
749+target triple = "spir"
750
751 %opencl.queue_t = type opaque
752 %struct.ndrange_t = type { i32 }
753 %opencl.clk_event_t = type opaque
754+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
755
756 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel"
757 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel"
758@@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown"
759
760 ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32
761 ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8
762-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
763 ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0
764-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17
765+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21
766 ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2
767-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20
768-; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
769+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
770+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24
771
772 ; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}}
773+; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
774+; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
775 ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]]
776 ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]]
777-; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
778 ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]]
779 ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
780 ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
781 ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
782 ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]]
783
784-; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32 }
785-; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }>
786-; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
787-; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32 }>
788+; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32, i8 addrspace(4)* }
789+; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>
790+; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
791+; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)* }>
792
793-; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
794-; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
795+; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
796+; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
797
798-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
799-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
800+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4
801+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4
802
803 ; Function Attrs: convergent noinline nounwind optnone
804-define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
805+define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
806 entry:
807+ %a.addr = alloca i32 addrspace(1)*, align 4
808+ %b.addr = alloca i32 addrspace(1)*, align 4
809+ %i.addr = alloca i32, align 4
810+ %c0.addr = alloca i8, align 1
811 %default_queue = alloca %opencl.queue_t*, align 4
812 %flags = alloca i32, align 4
813 %ndrange = alloca %struct.ndrange_t, align 4
814 %clk_event = alloca %opencl.clk_event_t*, align 4
815 %event_wait_list = alloca %opencl.clk_event_t*, align 4
816 %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4
817- %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4
818- %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
819+ %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4
820+ %tmp = alloca %struct.ndrange_t, align 4
821+ %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
822+ %tmp4 = alloca %struct.ndrange_t, align 4
823 %c = alloca i8, align 1
824+ %tmp11 = alloca %struct.ndrange_t, align 4
825+ %block_sizes = alloca [1 x i32], align 4
826+ %tmp12 = alloca %struct.ndrange_t, align 4
827+ %block_sizes13 = alloca [3 x i32], align 4
828+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
829+ store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4
830+ store i32 %i, i32* %i.addr, align 4
831+ store i8 %c0, i8* %c0.addr, align 1
832 store i32 0, i32* %flags, align 4
833 %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
834 %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4
835 store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4
836 %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
837 %2 = load i32, i32* %flags, align 4
838- %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
839- store i32 17, i32* %block.size, align 4
840- %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
841+ %3 = bitcast %struct.ndrange_t* %tmp to i8*
842+ %4 = bitcast %struct.ndrange_t* %ndrange to i8*
843+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false)
844+ %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
845+ store i32 21, i32* %block.size, align 4
846+ %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
847 store i32 4, i32* %block.align, align 4
848- %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
849- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4
850- %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
851- store i32 %i, i32* %block.captured1, align 4
852- %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
853- store i8 %c0, i8* %block.captured2, align 4
854- %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()*
855- %4 = addrspacecast void ()* %3 to i8 addrspace(4)*
856+ %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
857+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4
858+ %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
859+ %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
860+ store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4
861+ %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
862+ %6 = load i32, i32* %i.addr, align 4
863+ store i32 %6, i32* %block.captured1, align 4
864+ %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5
865+ %7 = load i8, i8* %c0.addr, align 1
866+ store i8 %7, i8* %block.captured2, align 4
867+ %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic*
868+ %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)*
869
870 ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]]
871 ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}}
872 ; [[ConstInt0]] [[EventNull]] [[EventNull]]
873 ; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]]
874
875-; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)*
876+; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic*
877+; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)*
878 ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)*
879-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]])
880-
881- %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4)
882- %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
883- %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
884- %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
885- store i32 20, i32* %block.size5, align 4
886- %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
887+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]])
888+
889+ %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
890+ %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
891+ %12 = load i32, i32* %flags, align 4
892+ %13 = bitcast %struct.ndrange_t* %tmp4 to i8*
893+ %14 = bitcast %struct.ndrange_t* %ndrange to i8*
894+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false)
895+ %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
896+ %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
897+ %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
898+ store i32 24, i32* %block.size5, align 4
899+ %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
900 store i32 4, i32* %block.align6, align 4
901- %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
902- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4
903- %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
904- store i32 %i, i32* %block.captured8, align 4
905- %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
906- store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4
907- %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()*
908- %9 = addrspacecast void ()* %8 to i8 addrspace(4)*
909+ %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
910+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4
911+ %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
912+ %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
913+ store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4
914+ %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
915+ %18 = load i32, i32* %i.addr, align 4
916+ store i32 %18, i32* %block.captured9, align 4
917+ %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5
918+ %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4
919+ store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4
920+ %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic*
921+ %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)*
922+
923
924 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]]
925 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]]
926@@ -158,16 +193,24 @@ entry:
927 ; [[ConstInt2]] [[Event1]] [[Event2]]
928 ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]]
929
930-; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)*
931+; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic*
932+; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4)
933 ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)*
934-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]])
935-
936- %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
937- %11 = alloca [1 x i32]
938- %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0
939- %13 = load i8, i8* %c, align 1
940- %14 = zext i8 %13 to i32
941- store i32 %14, i32* %12, align 4
942+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]])
943+
944+ %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21)
945+ %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
946+ %24 = load i32, i32* %flags, align 4
947+ %25 = bitcast %struct.ndrange_t* %tmp11 to i8*
948+ %26 = bitcast %struct.ndrange_t* %ndrange to i8*
949+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false)
950+ %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
951+ %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)*
952+ %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
953+ %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0
954+ %30 = load i8, i8* %c, align 1
955+ %31 = zext i8 %30 to i32
956+ store i32 %31, i32* %29, align 4
957
958 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]]
959 ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]]
960@@ -182,14 +225,18 @@ entry:
961 ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)*
962 ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}})
963
964- %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12)
965- %16 = alloca [3 x i32]
966- %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0
967- store i32 1, i32* %17, align 4
968- %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1
969- store i32 2, i32* %18, align 4
970- %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2
971- store i32 4, i32* %19, align 4
972+ %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29)
973+ %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
974+ %34 = load i32, i32* %flags, align 4
975+ %35 = bitcast %struct.ndrange_t* %tmp12 to i8*
976+ %36 = bitcast %struct.ndrange_t* %ndrange to i8*
977+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false)
978+ %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0
979+ store i32 1, i32* %37, align 4
980+ %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1
981+ store i32 2, i32* %38, align 4
982+ %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2
983+ store i32 4, i32* %39, align 4
984
985 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]]
986 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]]
987@@ -206,24 +253,27 @@ entry:
988 ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)*
989 ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}})
990
991- %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17)
992+ %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37)
993 ret void
994 }
995
996+; Function Attrs: argmemonly nounwind
997+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1
998+
999 ; Function Attrs: convergent noinline nounwind optnone
1000 define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 {
1001 entry:
1002 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1003- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
1004+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
1005 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1006- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
1007- store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
1008- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
1009+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
1010+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
1011+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5
1012 %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4
1013 %conv = sext i8 %0 to i32
1014- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2
1015+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
1016 %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4
1017- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
1018+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
1019 %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4
1020 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2
1021 store i32 %conv, i32 addrspace(1)* %arrayidx, align 4
1022@@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i
1023 define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 {
1024 entry:
1025 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1026- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
1027+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
1028 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1029- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
1030- store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
1031- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1032+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
1033+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
1034+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5
1035 %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4
1036- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1037+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1038 %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4
1039 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1
1040 %2 = load i32, i32 addrspace(1)* %arrayidx, align 4
1041- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2
1042+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1043 %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4
1044- %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1045+ %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1046 %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4
1047 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4
1048 store i32 %2, i32 addrspace(1)* %arrayidx4, align 4
1049@@ -276,11 +326,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac
1050 entry:
1051 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1052 %p.addr = alloca i8 addrspace(3)*, align 4
1053- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
1054+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
1055 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1056- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
1057+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
1058 store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4
1059- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
1060+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
1061 ret void
1062 }
1063
1064@@ -300,13 +350,13 @@ entry:
1065 %p1.addr = alloca i8 addrspace(3)*, align 4
1066 %p2.addr = alloca i8 addrspace(3)*, align 4
1067 %p3.addr = alloca i8 addrspace(3)*, align 4
1068- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
1069+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
1070 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1071- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
1072+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
1073 store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4
1074 store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4
1075 store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4
1076- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
1077+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
1078 ret void
1079 }
1080
1081@@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*,
1082 ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*)
1083 ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)
1084
1085-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
1086+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
1087 attributes #1 = { argmemonly nounwind }
1088-attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
1089+attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
1090 attributes #3 = { nounwind }
1091
1092 !llvm.module.flags = !{!0}
1093-!opencl.enable.FP_CONTRACT = !{}
1094 !opencl.ocl.version = !{!1}
1095 !opencl.spir.version = !{!1}
1096-!opencl.used.extensions = !{!2}
1097-!opencl.used.optional.core.features = !{!2}
1098-!opencl.compiler.options = !{!2}
1099-!llvm.ident = !{!3}
1100+!llvm.ident = !{!2}
1101
1102 !0 = !{i32 1, !"wchar_size", i32 4}
1103 !1 = !{i32 2, i32 0}
1104-!2 = !{}
1105-!3 = !{!"clang version 7.0.0"}
1106-!4 = !{i32 1, i32 1, i32 0, i32 0}
1107-!5 = !{!"none", !"none", !"none", !"none"}
1108-!6 = !{!"int*", !"int*", !"int", !"char"}
1109-!7 = !{!"", !"", !"", !""}
1110-!8 = !{i1 false, i1 false, i1 false, i1 false}
1111-!9 = !{i32 0, i32 0, i32 0, i32 0}
1112+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
1113+!3 = !{i32 1, i32 1, i32 0, i32 0}
1114+!4 = !{!"none", !"none", !"none", !"none"}
1115+!5 = !{!"int*", !"int*", !"int", !"char"}
1116+!6 = !{!"", !"", !"", !""}
1117--
11181.8.3.1
1119
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch
deleted file mode 100644
index 9d25bbad..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch
+++ /dev/null
@@ -1,66 +0,0 @@
1From 7bbd0058362ac3bb5edd7a82d43e1785810776b3 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Fri, 29 Mar 2019 08:56:53 +0800
4Subject: [PATCH] dont export targets for binaries
5
6The projects using LLVM cmake modules look for target binaries in
7sysroot as a result which isn't desirable in this case and isn't needed
8either.
9
10Upstream-Status: Inappropriate [cross-compile specific]
11
12Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
13---
14 llvm/cmake/modules/AddLLVM.cmake | 9 ---------
15 llvm/cmake/modules/TableGen.cmake | 6 ------
16 2 files changed, 15 deletions(-)
17
18diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
19index 0df6845..b79f4fa 100644
20--- a/llvm/cmake/modules/AddLLVM.cmake
21+++ b/llvm/cmake/modules/AddLLVM.cmake
22@@ -866,12 +866,6 @@ macro(add_llvm_tool name)
23
24 if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
25 if( LLVM_BUILD_TOOLS )
26- if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
27- NOT LLVM_DISTRIBUTION_COMPONENTS)
28- set(export_to_llvmexports EXPORT LLVMExports)
29- set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True)
30- endif()
31-
32 install(TARGETS ${name}
33 ${export_to_llvmexports}
34 RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR}
35@@ -884,9 +878,6 @@ macro(add_llvm_tool name)
36 endif()
37 endif()
38 endif()
39- if( LLVM_BUILD_TOOLS )
40- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name})
41- endif()
42 set_target_properties(${name} PROPERTIES FOLDER "Tools")
43 endmacro(add_llvm_tool name)
44
45diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
46index 3c84ae7..141219f 100644
47--- a/llvm/cmake/modules/TableGen.cmake
48+++ b/llvm/cmake/modules/TableGen.cmake
49@@ -164,14 +164,8 @@ macro(add_tablegen target project)
50 endif()
51
52 if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
53- if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
54- NOT LLVM_DISTRIBUTION_COMPONENTS)
55- set(export_to_llvmexports EXPORT LLVMExports)
56- endif()
57-
58 install(TARGETS ${target}
59 ${export_to_llvmexports}
60 RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR})
61 endif()
62- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target})
63 endmacro()
64--
652.7.4
66
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch
deleted file mode 100644
index 0dfc537b..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch
+++ /dev/null
@@ -1,59 +0,0 @@
1From 6c33fb58869ffb17106047c45ab8d3856966eaf7 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Tue, 26 Mar 2019 14:11:29 +0800
4Subject: [PATCH] point to correct clang project and tblgen
5
6Point to correct path for clang project as per the way we unpack. Also
7let llvm-tblgen path be passed from recipe itself.
8
9Also since we're going to do the patching ourselves, no need to look for
10git through cmake.
11
12Upstream-Status: Inappropriate [OE specific]
13---
14 CMakeLists.txt | 8 ++++----
15 1 file changed, 4 insertions(+), 4 deletions(-)
16
17diff --git a/CMakeLists.txt b/CMakeLists.txt
18index 174133b..c769f08 100644
19--- a/CMakeLists.txt
20+++ b/CMakeLists.txt
21@@ -53,7 +53,7 @@ endif(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
22 include(AddLLVM)
23 include(TableGen)
24
25-find_package(Git REQUIRED)
26+#find_package(Git REQUIRED)
27
28 if (NOT WIN32)
29 add_subdirectory( linux_linker )
30@@ -80,7 +80,7 @@ set(TARGET_NAME ${COMMON_CLANG_LIBRARY_NAME}${BUILD_PLATFORM} )
31
32 if(NOT USE_PREBUILT_LLVM)
33 set(TARGET_BRANCH "ocl-open-80")
34- set(CLANG_SOURCE_DIR ${LLVM_SOURCE_DIR}/tools/clang)
35+ set(CLANG_SOURCE_DIR ${LLVM_SOURCE_DIR}/../clang)
36 set(CLANG_BASE_REVISION a03da8be08a208122e292016cb6cea1f30229677)
37
38 set(SPIRV_SOURCE_DIR ${LLVM_SOURCE_DIR}/projects/llvm-spirv)
39@@ -102,7 +102,7 @@ endif(NOT USE_PREBUILT_LLVM)
40 set (COMPILE_OPTIONS_TD opencl_clang_options.td)
41 set (COMPILE_OPTIONS_INC opencl_clang_options.inc)
42
43-set(LLVM_TABLEGEN_EXE "llvm-tblgen")
44+#set(LLVM_TABLEGEN_EXE "llvm-tblgen")
45 set(LLVM_TARGET_DEFINITIONS ${COMPILE_OPTIONS_TD})
46 if(USE_PREBUILT_LLVM)
47 set(TABLEGEN_ADDITIONAL -I ${LLVM_INCLUDE_DIRS})
48@@ -153,7 +153,7 @@ endif()
49
50 if(NOT USE_PREBUILT_LLVM)
51 set(CLANG_BINARY_DIR ${LLVM_BINARY_DIR}/tools/clang/)
52- set(CLANG_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}/tools/clang/)
53+ set(CLANG_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}/../clang/)
54 include_directories(
55 ${CLANG_BINARY_DIR}/include # for tablegened includes
56 ${CLANG_SOURCE_DIR}/include # for basic headers
57--
582.19.1
59
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch
deleted file mode 100644
index 2e935a13..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch
+++ /dev/null
@@ -1,294 +0,0 @@
1From c94ec28600255098ffb9d73d1b386a7c8a535590 Mon Sep 17 00:00:00 2001
2From: Andrew Savonichev <andrew.savonichev@intel.com>
3Date: Thu, 21 Feb 2019 11:02:10 +0000
4Subject: [PATCH 2/2] [OpenCL] Simplify LLVM IR generated for OpenCL blocks
5
6Summary:
7Emit direct call of block invoke functions when possible, i.e. in case the
8block is not passed as a function argument.
9Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()`
10
11Reviewers: Anastasia, yaxunl, svenvh
12
13Reviewed By: Anastasia
14
15Subscribers: cfe-commits
16
17Tags: #clang
18
19Differential Revision: https://reviews.llvm.org/D58388
20
21git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354568 91177308-0d34-0410-b5e6-96231b3b80d8
22
23Upstream-Status: Backport
24[https://github.com/llvm-mirror/clang/commit/eae71f8d05ce550c4e2595c9b7082cc2c7882c58]
25Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
26---
27 lib/CodeGen/CGBlocks.cpp | 77 +++++++++++++-------------
28 lib/CodeGen/CGOpenCLRuntime.cpp | 30 +++++++---
29 lib/CodeGen/CGOpenCLRuntime.h | 4 ++
30 test/CodeGenOpenCL/blocks.cl | 10 +---
31 test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 34 +++++++++---
32 5 files changed, 91 insertions(+), 64 deletions(-)
33
34diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
35index fa3c3ee..10a0238 100644
36--- a/lib/CodeGen/CGBlocks.cpp
37+++ b/lib/CodeGen/CGBlocks.cpp
38@@ -1261,52 +1261,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
39 ReturnValueSlot ReturnValue) {
40 const BlockPointerType *BPT =
41 E->getCallee()->getType()->getAs<BlockPointerType>();
42-
43 llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
44-
45- // Get a pointer to the generic block literal.
46- // For OpenCL we generate generic AS void ptr to be able to reuse the same
47- // block definition for blocks with captures generated as private AS local
48- // variables and without captures generated as global AS program scope
49- // variables.
50- unsigned AddrSpace = 0;
51- if (getLangOpts().OpenCL)
52- AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
53-
54- llvm::Type *BlockLiteralTy =
55- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
56-
57- // Bitcast the callee to a block literal.
58- BlockPtr =
59- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
60-
61- // Get the function pointer from the literal.
62- llvm::Value *FuncPtr =
63- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
64- CGM.getLangOpts().OpenCL ? 2 : 3);
65-
66- // Add the block literal.
67+ llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType();
68+ llvm::Value *Func = nullptr;
69+ QualType FnType = BPT->getPointeeType();
70+ ASTContext &Ctx = getContext();
71 CallArgList Args;
72
73- QualType VoidPtrQualTy = getContext().VoidPtrTy;
74- llvm::Type *GenericVoidPtrTy = VoidPtrTy;
75 if (getLangOpts().OpenCL) {
76- GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType();
77- VoidPtrQualTy =
78- getContext().getPointerType(getContext().getAddrSpaceQualType(
79- getContext().VoidTy, LangAS::opencl_generic));
80- }
81-
82- BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy);
83- Args.add(RValue::get(BlockPtr), VoidPtrQualTy);
84-
85- QualType FnType = BPT->getPointeeType();
86+ // For OpenCL, BlockPtr is already casted to generic block literal.
87+
88+ // First argument of a block call is a generic block literal casted to
89+ // generic void pointer, i.e. i8 addrspace(4)*
90+ llvm::Value *BlockDescriptor = Builder.CreatePointerCast(
91+ BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType());
92+ QualType VoidPtrQualTy = Ctx.getPointerType(
93+ Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic));
94+ Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy);
95+ // And the rest of the arguments.
96+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
97+
98+ // We *can* call the block directly unless it is a function argument.
99+ if (!isa<ParmVarDecl>(E->getCalleeDecl()))
100+ Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
101+ else {
102+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2);
103+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
104+ }
105+ } else {
106+ // Bitcast the block literal to a generic block literal.
107+ BlockPtr = Builder.CreatePointerCast(
108+ BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal");
109+ // Get pointer to the block invoke function
110+ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3);
111
112- // And the rest of the arguments.
113- EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
114+ // First argument is a block literal casted to a void pointer
115+ BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy);
116+ Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy);
117+ // And the rest of the arguments.
118+ EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
119
120- // Load the function.
121- llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
122+ // Load the function.
123+ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
124+ }
125
126 const FunctionType *FuncTy = FnType->castAs<FunctionType>();
127 const CGFunctionInfo &FnInfo =
128diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
129index 7f6f595..75003e5 100644
130--- a/lib/CodeGen/CGOpenCLRuntime.cpp
131+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
132@@ -123,6 +123,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
133 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
134 }
135
136+// Get the block literal from an expression derived from the block expression.
137+// OpenCL v2.0 s6.12.5:
138+// Block variable declarations are implicitly qualified with const. Therefore
139+// all block variables must be initialized at declaration time and may not be
140+// reassigned.
141+static const BlockExpr *getBlockExpr(const Expr *E) {
142+ const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop.
143+ while(!isa<BlockExpr>(E) && E != Prev) {
144+ Prev = E;
145+ E = E->IgnoreCasts();
146+ if (auto DR = dyn_cast<DeclRefExpr>(E)) {
147+ E = cast<VarDecl>(DR->getDecl())->getInit();
148+ }
149+ }
150+ return cast<BlockExpr>(E);
151+}
152+
153 /// Record emitted llvm invoke function and llvm block literal for the
154 /// corresponding block expression.
155 void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
156@@ -137,20 +154,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
157 EnqueuedBlockMap[E].Kernel = nullptr;
158 }
159
160+llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
161+ return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
162+}
163+
164 CGOpenCLRuntime::EnqueuedBlockInfo
165 CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
166 CGF.EmitScalarExpr(E);
167
168 // The block literal may be assigned to a const variable. Chasing down
169 // to get the block literal.
170- if (auto DR = dyn_cast<DeclRefExpr>(E)) {
171- E = cast<VarDecl>(DR->getDecl())->getInit();
172- }
173- E = E->IgnoreImplicit();
174- if (auto Cast = dyn_cast<CastExpr>(E)) {
175- E = Cast->getSubExpr();
176- }
177- auto *Block = cast<BlockExpr>(E);
178+ const BlockExpr *Block = getBlockExpr(E);
179
180 assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
181 "Block expression not emitted");
182diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
183index 750721f..4effc7e 100644
184--- a/lib/CodeGen/CGOpenCLRuntime.h
185+++ b/lib/CodeGen/CGOpenCLRuntime.h
186@@ -92,6 +92,10 @@ public:
187 /// \param Block block literal emitted for the block expression.
188 void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
189 llvm::Value *Block);
190+
191+ /// \return LLVM block invoke function emitted for an expression derived from
192+ /// the block expression.
193+ llvm::Function *getInvokeFunction(const Expr *E);
194 };
195
196 }
197diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
198index 19aacc3..ab5a2c6 100644
199--- a/test/CodeGenOpenCL/blocks.cl
200+++ b/test/CodeGenOpenCL/blocks.cl
201@@ -39,11 +39,8 @@ void foo(){
202 // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
203 // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]],
204 // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]]
205- // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
206 // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
207- // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
208- // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
209- // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
210+ // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
211 // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2
212 // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]]
213 // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3
214@@ -53,11 +50,8 @@ void foo(){
215 // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic*
216 // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]],
217 // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]]
218- // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2
219 // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8*
220- // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]]
221- // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)*
222- // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]])
223+ // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
224
225 int (^ block_B)(void) = ^{
226 return i;
227diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
228index 8445016..1566912 100644
229--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
230+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
231@@ -312,9 +312,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
232 };
233
234 // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
235- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
236- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
237- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
238+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
239 block_A();
240
241 // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
242@@ -333,15 +331,35 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
243 unsigned size = get_kernel_work_group_size(block_A);
244
245 // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
246- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
247- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
248- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
249+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
250 block_A();
251
252+ // Make sure that block invoke function is resolved correctly after sequence of assignements.
253+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
254+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
255+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
256+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
257+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1,
258+ bl_t b1 = block_G;
259+ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)*
260+ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)*
261+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*)
262+ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*),
263+ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2,
264+ bl_t b2 = b1;
265+ // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)*
266+ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*)
267+ // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null)
268+ b2(0);
269+ // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
270+ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
271+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
272+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
273+ size = get_kernel_preferred_work_group_size_multiple(b2);
274+
275 void (^block_C)(void) = ^{
276 callee(i, a);
277 };
278-
279 // Emits block literal on stack and block kernel [[INVLK3]].
280 // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
281 // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
282@@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
283 // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
284 // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
285 // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
286+// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
287 // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
288 // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
289-// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
290 // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
291 // COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}})
292--
2931.8.3.1
294
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch
deleted file mode 100644
index 510c7c6e..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch
+++ /dev/null
@@ -1,61 +0,0 @@
1From 29e2813a2ab7d5569860bb07892dfef7b5374d96 Mon Sep 17 00:00:00 2001
2From: Yaxun Liu <Yaxun.Liu@amd.com>
3Date: Tue, 26 Feb 2019 16:20:41 +0000
4Subject: [PATCH] [OpenCL] Fix assertion due to blocks
5
6A recent change caused assertion in CodeGenFunction::EmitBlockCallExpr when a block is called.
7
8There is code
9
10 Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
11getCalleeDecl calls Expr::getReferencedDeclOfCallee, which does not handle
12BlockExpr and returns nullptr, which causes isa to assert.
13
14This patch fixes that.
15
16Differential Revision: https://reviews.llvm.org/D58658
17
18
19git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354893 91177308-0d34-0410-b5e6-96231b3b80d8
20
21Upstream-Status: Backport
22[https://github.com/llvm-mirror/clang/commit/29e2813a2ab7d5569860bb07892dfef7b5374d96]
23Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
24---
25 lib/AST/Expr.cpp | 2 ++
26 test/CodeGenOpenCL/blocks.cl | 6 ++++++
27 2 files changed, 8 insertions(+)
28
29diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
30index aef1eab..85690c7 100644
31--- a/lib/AST/Expr.cpp
32+++ b/lib/AST/Expr.cpp
33@@ -1358,6 +1358,8 @@ Decl *Expr::getReferencedDeclOfCallee() {
34 return DRE->getDecl();
35 if (MemberExpr *ME = dyn_cast<MemberExpr>(CEE))
36 return ME->getMemberDecl();
37+ if (auto *BE = dyn_cast<BlockExpr>(CEE))
38+ return BE->getBlockDecl();
39
40 return nullptr;
41 }
42diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
43index ab5a2c6..c3e2685 100644
44--- a/test/CodeGenOpenCL/blocks.cl
45+++ b/test/CodeGenOpenCL/blocks.cl
46@@ -90,6 +90,12 @@ int get42() {
47 return blockArgFunc(^{return 42;});
48 }
49
50+// COMMON-LABEL: define {{.*}}@call_block
51+// call {{.*}}@__call_block_block_invoke
52+int call_block() {
53+ return ^int(int num) { return num; } (11);
54+}
55+
56 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size"
57 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align"
58
59--
601.8.3.1
61
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
deleted file mode 100644
index f536f0f2..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
+++ /dev/null
@@ -1,16 +0,0 @@
1FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:"
2
3SRC_URI_append_intel-x86-common = " \
4 git://github.com/intel/opencl-clang.git;protocol=https;branch=ocl-open-80;destsuffix=git/llvm/projects/opencl-clang;name=opencl-clang \
5 git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_80;destsuffix=git/llvm/projects/llvm-spirv;name=spirv \
6 file://0001-point-to-correct-clang.patch;patchdir=llvm/projects/opencl-clang \
7 file://0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch;patchdir=clang \
8 file://0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch;patchdir=clang \
9 file://0003-OpenCL-Fix-assertion-due-to-blocks.patch;patchdir=clang \
10 file://0001-dont-export-targets-for-binaries.patch \
11 file://0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch;patchdir=llvm/projects/llvm-spirv \
12 "
13
14SRCREV_opencl-clang = "daf5e4dd718477ae8cf89a283c653939d9182f15"
15SRCREV_spirv = "bd0f28fb92061d49c0f120b4dac3fd8956006745"
16
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/0002-Build-not-able-to-locate-cpp_generation_tool.patch b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/0002-Build-not-able-to-locate-cpp_generation_tool.patch
new file mode 100644
index 00000000..45288ce1
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/0002-Build-not-able-to-locate-cpp_generation_tool.patch
@@ -0,0 +1,41 @@
1From 8c330d0cb5167612296801f0202b0de35e9ca88d Mon Sep 17 00:00:00 2001
2From: Dongwon Kim <dongwon.kim@intel.com>
3Date: Sat, 21 Aug 2021 16:09:39 -0700
4Subject: [PATCH 2/5] Build not able to locate cpp_generation_tool.
5
6Upstream-Status: Inappropriate [oe specific]
7
8Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
9Signed-off-by: Dongwon Kim <dongwon.kim@intel.com>
10---
11 shared/source/built_ins/kernels/CMakeLists.txt | 10 +++++-----
12 1 file changed, 5 insertions(+), 5 deletions(-)
13
14Index: git/shared/source/built_ins/kernels/CMakeLists.txt
15===================================================================
16--- git.orig/shared/source/built_ins/kernels/CMakeLists.txt
17+++ git/shared/source/built_ins/kernels/CMakeLists.txt
18@@ -122,9 +122,9 @@ function(compile_builtin core_type platf
19 endif()
20 add_custom_command(
21 OUTPUT ${OUTPUT_FILE_CPP}
22- COMMAND $<TARGET_FILE:cpp_generate_tool> --file ${BINARY_OUTPUT}.bin --output ${OUTPUT_FILE_CPP} --array ${mode}_${BASENAME} --device ${RELEASE_FILENAME}
23+ COMMAND cpp_generate_tool --file ${BINARY_OUTPUT}.bin --output ${OUTPUT_FILE_CPP} --array ${mode}_${BASENAME} --device ${RELEASE_FILENAME}
24 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
25- DEPENDS ${OUTPUT_FILES_BINARIES} $<TARGET_FILE:cpp_generate_tool>
26+ DEPENDS ${OUTPUT_FILES_BINARIES} cpp_generate_tool
27 )
28 list(APPEND BUILTINS_COMMANDS "${OUTPUT_FILE_CPP}")
29 else()
30@@ -176,9 +176,9 @@ function(generate_cpp_spirv builtin)
31 endif()
32 add_custom_command(
33 OUTPUT ${OUTPUT_FILE_CPP}
34- COMMAND $<TARGET_FILE:cpp_generate_tool> --file ${GENERATED_SPV_INPUT} --output ${OUTPUT_FILE_CPP} --array ${BASENAME}
35+ COMMAND cpp_generate_tool --file ${GENERATED_SPV_INPUT} --output ${OUTPUT_FILE_CPP} --array ${BASENAME}
36 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
37- DEPENDS ${GENERATED_SPV_INPUT} $<TARGET_FILE:cpp_generate_tool>
38+ DEPENDS ${GENERATED_SPV_INPUT} cpp_generate_tool
39 )
40 set(OUTPUT_LIST_CPP_FILES ${OUTPUT_LIST_CPP_FILES} ${OUTPUT_FILE_CPP} PARENT_SCOPE)
41 else()
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/0003-external-ocloc.patch b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/0003-external-ocloc.patch
new file mode 100644
index 00000000..2001d839
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/0003-external-ocloc.patch
@@ -0,0 +1,38 @@
1From 0006db5f55a9f08bd3452558a53704cd3bbb790f Mon Sep 17 00:00:00 2001
2From: Dongwon Kim <dongwon.kim@intel.com>
3Date: Wed, 2 Mar 2022 15:52:45 -0800
4Subject: [PATCH 3/5] external ocloc
5
6Upstream-Status: Inappropriate
7
8Signed-off-by: Dongwon Kim <dongwon.kim@intel.com>
9---
10 cmake/ocloc_cmd_prefix.cmake | 14 ++++++++------
11 1 file changed, 8 insertions(+), 6 deletions(-)
12
13Index: git/cmake/ocloc_cmd_prefix.cmake
14===================================================================
15--- git.orig/cmake/ocloc_cmd_prefix.cmake
16+++ git/cmake/ocloc_cmd_prefix.cmake
17@@ -4,13 +4,15 @@
18 # SPDX-License-Identifier: MIT
19 #
20
21-if(WIN32)
22- set(ocloc_cmd_prefix ocloc)
23-else()
24- if(DEFINED NEO__IGC_LIBRARY_PATH)
25- set(ocloc_cmd_prefix ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${NEO__IGC_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib>" $<TARGET_FILE:ocloc>)
26+if(NOT DEFINED ocloc_cmd_prefix)
27+ if(WIN32)
28+ set(ocloc_cmd_prefix ocloc)
29 else()
30- set(ocloc_cmd_prefix ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib>" $<TARGET_FILE:ocloc>)
31+ if(DEFINED NEO__IGC_LIBRARY_PATH)
32+ set(ocloc_cmd_prefix LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${NEO__IGC_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib> $<TARGET_FILE:ocloc>)
33+ else()
34+ set(ocloc_cmd_prefix LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib> $<TARGET_FILE:ocloc>)
35+ endif()
36 endif()
37 endif()
38
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_25.13.33276.16.bb b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_25.13.33276.16.bb
new file mode 100644
index 00000000..b13daaa8
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_25.13.33276.16.bb
@@ -0,0 +1,61 @@
1SUMMARY = "The Intel(R) Graphics Compute Runtime for OpenCL(TM)"
2DESCRIPTION = "The Intel(R) Graphics Compute Runtime for OpenCL(TM) \
3is an open source project to converge Intel's development efforts \
4on OpenCL(TM) compute stacks supporting the GEN graphics hardware \
5architecture."
6
7LICENSE = "MIT & Apache-2.0"
8LIC_FILES_CHKSUM = "file://LICENSE.md;md5=eca6ec6997e18db166db7109cdbe611c \
9 file://third_party/opencl_headers/LICENSE;md5=3b83ef96387f14655fc854ddc3c6bd57"
10
11SRC_URI = "git://github.com/intel/compute-runtime.git;protocol=https;branch=releases/25.13 \
12 file://0002-Build-not-able-to-locate-cpp_generation_tool.patch \
13 file://0003-external-ocloc.patch \
14 "
15
16SRCREV = "a9961bdfaa07250fd52ff930bf8f31fb4e3b7799"
17
18DEPENDS += " intel-graphics-compiler gmmlib libva qemu-native"
19
20RDEPENDS:${PN} += " intel-graphics-compiler gmmlib"
21
22inherit cmake pkgconfig qemu
23
24COMPATIBLE_HOST = '(x86_64).*-linux'
25COMPATIBLE_HOST:libc-musl = "null"
26
27EXTRA_OECMAKE = " \
28 -DIGC_DIR=${STAGING_INCDIR}/igc \
29 -DBUILD_TYPE=Release \
30 -DSKIP_UNIT_TESTS=1 \
31 -DCCACHE_ALLOWED=FALSE \
32 -DNEO_DISABLE_LD_LLD=ON \
33 -DNEO_DISABLE_LD_GOLD=ON \
34 "
35
36EXTRA_OECMAKE:append:class-target = " \
37 -Docloc_cmd_prefix=ocloc \
38 -DCMAKE_CROSSCOMPILING_EMULATOR=${WORKDIR}/qemuwrapper \
39 "
40
41PACKAGECONFIG ??= ""
42PACKAGECONFIG[levelzero] = "-DBUILD_WITH_L0=ON, -DBUILD_WITH_L0=OFF, level-zero"
43
44do_configure:prepend:class-target () {
45 # Write out a qemu wrapper that will be used by cmake.
46 qemu_binary="${@qemu_wrapper_cmdline(d, d.getVar('STAGING_DIR_HOST'), [d.expand('${B}/bin'),d.expand('${STAGING_DIR_HOST}${libdir}'),d.expand('${STAGING_DIR_HOST}${base_libdir}')])}"
47 cat > ${WORKDIR}/qemuwrapper << EOF
48#!/bin/sh
49$qemu_binary "\$@"
50EOF
51 chmod +x ${WORKDIR}/qemuwrapper
52}
53
54FILES:${PN} += " \
55 ${libdir}/intel-opencl/libigdrcl.so \
56 ${libdir}/libocloc.so \
57 "
58
59FILES:${PN}-dev = "${includedir}"
60
61UPSTREAM_CHECK_GITTAGREGEX = "(?P<pver>\d+(\.\d+)+)"
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch
new file mode 100644
index 00000000..456a8c65
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch
@@ -0,0 +1,32 @@
1From 1b98a931c3bf8daccc48cd618335ff35e3d382da Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Tue, 12 Oct 2021 23:46:42 +0800
4Subject: [PATCH] BiF/CMakeLists.txt: remove opt from DEPENDS
5
6Otherwise it starts failing with:
7
8| ninja: error: 'IGC/VectorCompiler/lib/BiF/opt', needed by 'IGC/VectorCompiler/lib/BiF/VCBiFPrintfOCL32.opt.bc', missing and no known rule to make it
9
10We don't need to explicitly make sure opt is built when
11using prebuilt binaries.
12
13Upstream-Status: Inappropriate
14
15Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
16---
17 IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake | 2 +-
18 1 file changed, 1 insertion(+), 1 deletion(-)
19
20Index: git/IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake
21===================================================================
22--- git.orig/IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake
23+++ git/IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake
24@@ -121,7 +121,7 @@ function(vc_build_bif RES_FILE CMCL_SRC_
25 COMMENT "vc_build_bif: Translating CMCL builtins: ${BIF_CLANG_BC_NAME_FINAL} -> ${BIF_OPT_BC_NAME}"
26 COMMAND CMCLTranslatorTool ${OPT_OPAQUE_ARG} -o ${BIF_CMCL_BC_PATH} ${BIF_CLANG_BC_PATH_FINAL}
27 COMMAND ${LLVM_OPT_EXE} ${OPT_OPAQUE_ARG} --O2 -o ${BIF_OPT_BC_PATH} ${BIF_CMCL_BC_PATH}
28- DEPENDS CMCLTranslatorTool ${LLVM_OPT_EXE} ${OPT_BC_DEPENDS})
29+ DEPENDS CMCLTranslatorTool ${BIF_CLANG_BC_PATH_FINAL})
30
31 add_custom_target(${TARGET_NAME}
32 DEPENDS ${BIF_OPT_BC_PATH}
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-Build-not-able-to-locate-BiFManager-bin.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-Build-not-able-to-locate-BiFManager-bin.patch
new file mode 100644
index 00000000..87b094aa
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-Build-not-able-to-locate-BiFManager-bin.patch
@@ -0,0 +1,27 @@
1From 048512728eea53b3772a3f80ac9743bfc462487e Mon Sep 17 00:00:00 2001
2From: Yogesh Tyagi <yogesh.tyagi@intel.com>
3Date: Thu, 2 Jan 2025 15:59:27 +0530
4Subject: [PATCH] Build not able to locate BiFManager-bin
5
6Upstream-Status: Inappropriate [oe specific]
7
8Signed-off-by: Yogesh Tyagi <yogesh.tyagi@intel.com>
9---
10 IGC/BiFModule/CMakeLists.txt | 4 ++--
11 1 file changed, 2 insertions(+), 2 deletions(-)
12
13Index: git/IGC/BiFModule/CMakeLists.txt
14===================================================================
15--- git.orig/IGC/BiFModule/CMakeLists.txt
16+++ git/IGC/BiFModule/CMakeLists.txt
17@@ -655,8 +655,8 @@ set(IGC_BUILD__PROJ__BiFModuleCache_OCL
18
19 add_custom_command(
20 OUTPUT "${IGC_BUILD__BIF_DIR}/OCLBiFImpl.h" "${IGC_BUILD__BIF_DIR}/OCLBiFImpl.bifbc"
21- COMMAND $<TARGET_FILE:BiFManager-bin> "${IGC_BUILD__BIF_DIR}/OCLBiFImpl.bc" "${IGC_BUILD__BIF_DIR}/IGCsize_t_32.bc" "${IGC_BUILD__BIF_DIR}/IGCsize_t_64.bc" "${IGC_BUILD__BIF_DIR}/OCLBiFImpl.bifbc" "${IGC_BUILD__BIF_DIR}/OCLBiFImpl.h"
22- DEPENDS "${IGC_BUILD__BIF_DIR}/OCLBiFImpl.bc" "${IGC_BUILD__BIF_DIR}/IGCsize_t_32.bc" "${IGC_BUILD__BIF_DIR}/IGCsize_t_64.bc"$<TARGET_FILE:BiFManager-bin>
23+ COMMAND BiFManager-bin "${IGC_BUILD__BIF_DIR}/OCLBiFImpl.bc" "${IGC_BUILD__BIF_DIR}/IGCsize_t_32.bc" "${IGC_BUILD__BIF_DIR}/IGCsize_t_64.bc" "${IGC_BUILD__BIF_DIR}/OCLBiFImpl.bifbc" "${IGC_BUILD__BIF_DIR}/OCLBiFImpl.h"
24+ DEPENDS "${IGC_BUILD__BIF_DIR}/OCLBiFImpl.bc" "${IGC_BUILD__BIF_DIR}/IGCsize_t_32.bc" "${IGC_BUILD__BIF_DIR}/IGCsize_t_64.bc" BiFManager-bin
25 COMMENT "BiF: ${IGC_BUILD__BIF_DIR}/OCLBiFImpl.bc: Spliting output .bc."
26 COMMAND_EXPAND_LISTS
27 )
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch
new file mode 100644
index 00000000..fc66b71d
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch
@@ -0,0 +1,30 @@
1From 251e2854dd206ebf66e5908d3277e4585fe2a63b Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Mon, 9 Jan 2023 11:43:05 +0800
4Subject: [PATCH] external/SPIRV-Tools: change path to tools and headers
5
6We clone the SPIRV headers and tools in a different directory to ensure
7file path substitutions take place.
8
9Upstream-Status: Inappropriate
10
11Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
12---
13 external/SPIRV-Tools/CMakeLists.txt | 4 ++--
14 1 file changed, 2 insertions(+), 2 deletions(-)
15
16Index: git/external/SPIRV-Tools/CMakeLists.txt
17===================================================================
18--- git.orig/external/SPIRV-Tools/CMakeLists.txt
19+++ git/external/SPIRV-Tools/CMakeLists.txt
20@@ -45,8 +45,8 @@ else() #By default use build from source
21 message(STATUS "[SPIRV-Tools] : Building from source")
22 message(STATUS "[SPIRV-Tools] : Current source dir: ${CMAKE_CURRENT_SOURCE_DIR}")
23
24- set(SPIRV-Headers_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../SPIRV-Headers") # used in subdirectory
25- set(SPIRV-Tools_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../SPIRV-Tools")
26+ set(SPIRV-Headers_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../SPIRV-Headers") # used in subdirectory
27+ set(SPIRV-Tools_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../SPIRV-Tools")
28
29 set(SPIRV-Tools_OUTPUT_DIR "${IGC_OPTION__OUTPUT_DIR}/external/SPIRV-Tools/build")
30 set(IGC_BUILD__SPIRV-Headers_DIR "${SPIRV-Headers_SOURCE_DIR}")
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-fix-tblgen.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-fix-tblgen.patch
new file mode 100644
index 00000000..3d9ae02f
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-fix-tblgen.patch
@@ -0,0 +1,23 @@
1From 1641dc87b2ed6b6b87b2cef824e4d66da65b0b30 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Thu, 19 May 2022 22:50:09 +0800
4Subject: [PATCH] fix tblgen
5
6Upstream-Status: Inappropriate [OE specific]
7---
8 IGC/cmake/igc_llvm.cmake | 2 +-
9 1 file changed, 1 insertion(+), 1 deletion(-)
10
11diff --git a/IGC/cmake/igc_llvm.cmake b/IGC/cmake/igc_llvm.cmake
12index b708cc904..fe4668890 100644
13--- a/IGC/cmake/igc_llvm.cmake
14+++ b/IGC/cmake/igc_llvm.cmake
15@@ -53,7 +53,7 @@ else()
16 set(LLVM_OPT_EXE "opt" CACHE STRING "")
17
18 set(LLVM_TABLEGEN_EXE "llvm-tblgen")
19- if(CMAKE_CROSSCOMPILING)
20+ if(TRUE)
21 if(DEFINED LLVM_TABLEGEN)
22 set(LLVM_TABLEGEN_EXE ${LLVM_TABLEGEN})
23 else()
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch
new file mode 100644
index 00000000..4269fadf
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch
@@ -0,0 +1,33 @@
1From ca136c04d4ac60e3febc8ea2b9c4d4736365a424 Mon Sep 17 00:00:00 2001
2From: Lee Chee Yang <chee.yang.lee@intel.com>
3Date: Wed, 2 Sep 2020 08:28:35 +0800
4Subject: [PATCH] Improve Reproducibility for src package
5
6Improve reproducibility for intel-graphics-compiler-src package.
7needs to pass build path as environment variable to the build.
8this only works on bison 3.7 onward, hence check for bison version
9before adding the flags.
10Upstream-Status: Inappropriate [applying --file-prefix-map in such way does not work for upstream]
11Signed-off-by: Lee Chee Yang <chee.yang.lee@intel.com>
12---
13 visa/CMakeLists.txt | 7 +++++--
14 1 file changed, 5 insertions(+), 2 deletions(-)
15
16Index: git/visa/CMakeLists.txt
17===================================================================
18--- git.orig/visa/CMakeLists.txt
19+++ git/visa/CMakeLists.txt
20@@ -135,8 +135,11 @@ endif()
21 set(bison_output_file ${CMAKE_CURRENT_BINARY_DIR}/CISA.tab.cpp)
22 set(flex_output_file ${CMAKE_CURRENT_BINARY_DIR}/lex.CISA.cpp)
23
24-BISON_TARGET(CISAParser CISA.y ${bison_output_file} COMPILE_FLAGS "-vt -p CISA")
25-FLEX_TARGET(CISAScanner CISA.l ${flex_output_file} COMPILE_FLAGS "-PCISA ${WIN_FLEX_FLAG}")
26+if(BISON_VERSION VERSION_GREATER_EQUAL "3.7.0")
27+ set(BISON_EXTRA_FLAGS " --file-prefix-map=$ENV{B}=/igc/ ")
28+endif()
29+BISON_TARGET(CISAParser CISA.y ${bison_output_file} COMPILE_FLAGS "-l -vt -p CISA ${BISON_EXTRA_FLAGS} ")
30+FLEX_TARGET(CISAScanner CISA.l ${flex_output_file} COMPILE_FLAGS "-PCISA -L ${WIN_FLEX_FLAG} ")
31 ADD_FLEX_BISON_DEPENDENCY(CISAScanner CISAParser)
32 set(CISAScanner_dependencies)
33
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_2.10.10.bb b/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_2.10.10.bb
new file mode 100644
index 00000000..05611cbf
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_2.10.10.bb
@@ -0,0 +1,77 @@
1SUMMARY = "The Intel(R) Graphics Compiler for OpenCL(TM)"
2DESCRIPTION = "The Intel(R) Graphics Compiler for OpenCL(TM) is an \
3llvm based compiler for OpenCL(TM) targeting Intel Gen graphics \
4hardware architecture."
5
6LICENSE = "MIT & Apache-2.0"
7LIC_FILES_CHKSUM = "file://IGC/BiFModule/Implementation/ExternalLibraries/libclc/LICENSE.TXT;md5=311cfc1a5b54bab8ed34a0b5fba4373e \
8 file://LICENSE.md;md5=488d74376edf2765f6e78d271543dde3 \
9 file://NOTICES.txt;md5=b81a52411c84df3419f20bad4d755880"
10
11SRC_URI = "git://github.com/intel/intel-graphics-compiler.git;protocol=https;name=igc;branch=releases/2.10.x \
12 git://github.com/intel/vc-intrinsics.git;protocol=https;destsuffix=${BB_GIT_DEFAULT_DESTSUFFIX}/vc-intrinsics;name=vc;nobranch=1 \
13 git://github.com/KhronosGroup/SPIRV-Tools.git;protocol=https;destsuffix=${BB_GIT_DEFAULT_DESTSUFFIX}/SPIRV-Tools;name=spirv-tools;branch=main \
14 git://github.com/KhronosGroup/SPIRV-Headers.git;protocol=https;destsuffix=${BB_GIT_DEFAULT_DESTSUFFIX}/SPIRV-Headers;name=spirv-headers;branch=main \
15 file://0003-Improve-Reproducibility-for-src-package.patch \
16 file://0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch \
17 file://0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch \
18 file://0001-Build-not-able-to-locate-BiFManager-bin.patch \
19 "
20
21SRC_URI:append:class-native = " file://0001-fix-tblgen.patch"
22
23SRCREV_igc = "83925314d4fc32b017fcbfcd73e0667ba833fb8f"
24SRCREV_vc = "9d255266e1df8f1dc5d11e1fbb03213acfaa4fc7"
25SRCREV_spirv-tools = "f289d047f49fb60488301ec62bafab85573668cc"
26SRCREV_spirv-headers = "0e710677989b4326ac974fd80c5308191ed80965"
27
28SRCREV_FORMAT = "igc_vc_spirv-tools_spirv-headers"
29
30# Used to replace with relative path in reproducibility patch
31export B
32
33inherit cmake pkgconfig qemu python3native
34
35CXXFLAGS:append = " -Wno-error=nonnull"
36
37COMPATIBLE_HOST = '(x86_64).*-linux'
38COMPATIBLE_HOST:libc-musl = "null"
39
40DEPENDS += " flex-native bison-native clang clang-cross-x86_64 opencl-clang qemu-native python3-mako-native \
41 python3-pyyaml-native \
42 "
43
44RDEPENDS:${PN} += "opencl-clang"
45
46PACKAGECONFIG ??= "vc"
47PACKAGECONFIG[vc] = "-DIGC_BUILD__VC_ENABLED=ON -DIGC_OPTION__LINK_KHRONOS_SPIRV_TRANSLATOR=ON -DIGC_OPTION__SPIRV_TRANSLATOR_MODE=Prebuilds,-DIGC_BUILD__VC_ENABLED=OFF,"
48
49EXTRA_OECMAKE = " \
50 -DIGC_OPTION__LLVM_PREFERRED_VERSION=${LLVMVERSION} \
51 -DVC_INTRINSICS_SRC="${S}/vc-intrinsics" \
52 -DIGC_OPTION__LLVM_MODE=Prebuilds \
53 -DLLVM_TABLEGEN=${STAGING_BINDIR_NATIVE}/llvm-tblgen \
54 -DLLVM_LINK_EXE=${STAGING_BINDIR_NATIVE}/llvm-link \
55 -DCLANG_EXE=${STAGING_BINDIR_NATIVE}/clang \
56 -DCMAKE_CROSSCOMPILING_EMULATOR=${WORKDIR}/qemuwrapper \
57 "
58
59do_configure:prepend:class-target () {
60 # Write out a qemu wrapper that will be used by cmake.
61 qemu_binary="${@qemu_wrapper_cmdline(d, d.getVar('STAGING_DIR_HOST'), [d.expand('${STAGING_DIR_HOST}${libdir}'),d.expand('${STAGING_DIR_HOST}${base_libdir}')])}"
62 cat > ${WORKDIR}/qemuwrapper << EOF
63#!/bin/sh
64$qemu_binary "\$@"
65EOF
66 chmod +x ${WORKDIR}/qemuwrapper
67}
68
69
70UPSTREAM_CHECK_GITTAGREGEX = "^v(?P<pver>\d+(\.\d+)+)$"
71
72FILES:${PN} += " \
73 ${libdir}/igc2/NOTICES.txt \
74 "
75
76# libigc.so contains buildpaths
77INSANE_SKIP:${PN} += "buildpaths"
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch
new file mode 100644
index 00000000..031a77c7
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch
@@ -0,0 +1,49 @@
1From 5aea653e611b59c70e529a1bd71885a509831557 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Tue, 1 Aug 2023 11:15:31 +0800
4Subject: [PATCH] cl_headers/CMakeLists.txt: use clang from native sysroot
5
6Allow clang to be found in target sysroot for target builds and dont try
7to compile cross binaries, we do that ourselves.
8
9Upstream-Status: Inappropriate [oe-specific]
10Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
11---
12 CMakeLists.txt | 8 ++++----
13 cl_headers/CMakeLists.txt | 2 +-
14 2 files changed, 5 insertions(+), 5 deletions(-)
15
16diff --git a/CMakeLists.txt b/CMakeLists.txt
17index 5864009..60ba39e 100644
18--- a/CMakeLists.txt
19+++ b/CMakeLists.txt
20@@ -35,10 +35,10 @@ set(CMAKE_MODULE_PATH
21
22 include(CMakeFunctions)
23
24-if(CMAKE_CROSSCOMPILING AND OPENCL_CLANG_BUILD_EXTERNAL)
25- include(CrossCompile)
26- llvm_create_cross_target(${PROJECT_NAME} NATIVE "" Release)
27-endif()
28+#if(CMAKE_CROSSCOMPILING AND OPENCL_CLANG_BUILD_EXTERNAL)
29+# include(CrossCompile)
30+# llvm_create_cross_target(${PROJECT_NAME} NATIVE "" Release)
31+#endif()
32
33 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
34 set(USE_PREBUILT_LLVM ON)
35diff --git a/cl_headers/CMakeLists.txt b/cl_headers/CMakeLists.txt
36index 16cabb7..4423536 100644
37--- a/cl_headers/CMakeLists.txt
38+++ b/cl_headers/CMakeLists.txt
39@@ -1,6 +1,6 @@
40 set(CL_HEADERS_LIB cl_headers)
41 if(USE_PREBUILT_LLVM)
42- find_program(CLANG_COMMAND clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
43+ find_program(CLANG_COMMAND clang PATHS ${LLVM_TOOLS_BINARY_DIR})
44 else()
45 set(CLANG_COMMAND $<TARGET_FILE:clang>)
46 endif()
47--
482.37.3
49
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-Request-native-clang-only-when-cross-compiling-464.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-Request-native-clang-only-when-cross-compiling-464.patch
new file mode 100644
index 00000000..2f1814f8
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-Request-native-clang-only-when-cross-compiling-464.patch
@@ -0,0 +1,60 @@
1From 43c806ef321b1f677a49d28c89fb7ffecf539c2d Mon Sep 17 00:00:00 2001
2From: Tim Creech <timothy.m.creech@intel.com>
3Date: Wed, 28 Jun 2023 03:45:51 -0400
4Subject: [PATCH 2/2] Request native clang only when cross-compiling (#464)
5
6* Request native clang only when cross-compiling
7
8LLVM_USE_HOST_TOOLS may be set if LLVM is configured with
9LLVM_OPTIMIZED_TABLEGEN, which does not necessarily indicate
10cross-compilation or that clang will only execute on the target.
11
12By checking that CMAKE_CROSSCOMPILING is set, we ensure that we only
13build/use clang again if necessary for host execution.
14
15* fixup: CMAKE_CROSSCOMPILING implies LLVM_USE_HOST_TOOLS
16
17Co-authored-by: Wenju He <wenju.he@intel.com>
18
19* fixup: also use CMAKE_CROSSCOMPILING in top-level CMakeLists.txt
20
21---------
22
23Co-authored-by: Wenju He <wenju.he@intel.com>
24
25Upstream-Status: Backport [https://github.com/intel/opencl-clang/commit/53843eee13cfb2357919ee02714a43bef1af0f86]
26Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
27---
28 CMakeLists.txt | 2 +-
29 cl_headers/CMakeLists.txt | 2 +-
30 2 files changed, 2 insertions(+), 2 deletions(-)
31
32diff --git a/CMakeLists.txt b/CMakeLists.txt
33index e772de9..5864009 100644
34--- a/CMakeLists.txt
35+++ b/CMakeLists.txt
36@@ -35,7 +35,7 @@ set(CMAKE_MODULE_PATH
37
38 include(CMakeFunctions)
39
40-if(LLVM_USE_HOST_TOOLS AND OPENCL_CLANG_BUILD_EXTERNAL)
41+if(CMAKE_CROSSCOMPILING AND OPENCL_CLANG_BUILD_EXTERNAL)
42 include(CrossCompile)
43 llvm_create_cross_target(${PROJECT_NAME} NATIVE "" Release)
44 endif()
45diff --git a/cl_headers/CMakeLists.txt b/cl_headers/CMakeLists.txt
46index 18296c2..16cabb7 100644
47--- a/cl_headers/CMakeLists.txt
48+++ b/cl_headers/CMakeLists.txt
49@@ -4,7 +4,7 @@ if(USE_PREBUILT_LLVM)
50 else()
51 set(CLANG_COMMAND $<TARGET_FILE:clang>)
52 endif()
53-if(LLVM_USE_HOST_TOOLS AND NOT OPENCL_CLANG_BUILD_EXTERNAL)
54+if(CMAKE_CROSSCOMPILING AND NOT OPENCL_CLANG_BUILD_EXTERNAL)
55 build_native_tool(clang CLANG_COMMAND)
56 endif()
57
58--
592.37.3
60
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc
new file mode 100644
index 00000000..726b035d
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc
@@ -0,0 +1,33 @@
1SUMMARY = "Common clang is a thin wrapper library around clang"
2DESCRIPTION = "Common clang has OpenCL-oriented API and is capable \
3 to compile OpenCL C kernels to SPIR-V modules."
4
5LICENSE = "NCSA"
6LIC_FILES_CHKSUM = "file://LICENSE;md5=e8a15bf1416762a09ece07e44c79118c"
7
8SRC_URI = "git://github.com/intel/opencl-clang.git;branch=${BRANCH};protocol=https \
9 file://0002-Request-native-clang-only-when-cross-compiling-464.patch \
10 file://0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch \
11 "
12
13inherit cmake
14DEPENDS += "clang"
15DEPENDS:append:class-target = " opencl-clang-native"
16
17COMPATIBLE_HOST = '(x86_64).*-linux'
18COMPATIBLE_HOST:libc-musl = "null"
19
20DEPENDS += " spirv-llvm-translator"
21
22EXTRA_OECMAKE += "\
23 -DLLVM_TABLEGEN_EXE=${STAGING_BINDIR_NATIVE}/llvm-tblgen \
24 -DCMAKE_SKIP_RPATH=TRUE \
25 -DPREFERRED_LLVM_VERSION=${LLVMVERSION} \
26 "
27
28do_install:append:class-native() {
29 install -d ${D}${bindir}
30 install -m 0755 ${B}/bin/linux_resource_linker ${D}${bindir}/
31}
32
33BBCLASSEXTEND = "native nativesdk"
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_15.0.0.bb b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_15.0.0.bb
new file mode 100644
index 00000000..e946c31c
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_15.0.0.bb
@@ -0,0 +1,5 @@
1require opencl-clang.inc
2
3SRCREV = "60fd799cc58755c16d951f9ebfde6d0f9b8554dd"
4
5BRANCH = "ocl-open-150"
diff --git a/dynamic-layers/openembedded-layer/recipes-bsp/amt/files/0001-LMS-fix-build-issue-with-gcc-15.patch b/dynamic-layers/openembedded-layer/recipes-bsp/amt/files/0001-LMS-fix-build-issue-with-gcc-15.patch
new file mode 100644
index 00000000..751c7973
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-bsp/amt/files/0001-LMS-fix-build-issue-with-gcc-15.patch
@@ -0,0 +1,32 @@
1From 439af27f7641185933d7810b6c4eb17086438df3 Mon Sep 17 00:00:00 2001
2From: Yogesh Tyagi <yogesh.tyagi@intel.com>
3Date: Mon, 19 May 2025 17:50:40 +0530
4Subject: [PATCH] LMS : fix build issue with gcc 15
5
6include cstdint header to resolve the below error with gcc 15
7
8| In file included from /lms/2406.0.0.0/git/MEIClient/src/MEICommand.cpp:11:
9| /lms/2406.0.0.0/git/MEIClient/Include/MEICommand.h:40:54: error: 'uint8_t' was not declared in this scope
10
11Upstream-Status: Submitted [https://github.com/intel/lms/pull/23]
12
13Signed-off-by: Yogesh Tyagi <yogesh.tyagi@intel.com>
14---
15 MEIClient/Include/MEICommand.h | 1 +
16 1 file changed, 1 insertion(+)
17
18diff --git a/MEIClient/Include/MEICommand.h b/MEIClient/Include/MEICommand.h
19index 6192d26..5332e44 100644
20--- a/MEIClient/Include/MEICommand.h
21+++ b/MEIClient/Include/MEICommand.h
22@@ -12,6 +12,7 @@
23 #define __MEI_COMMAND_H__
24 #include "heci.h"
25 #include "MEIClientException.h"
26+#include <cstdint>
27 #include <memory>
28 #include <vector>
29
30--
312.43.0
32
diff --git a/dynamic-layers/openembedded-layer/recipes-bsp/amt/files/lms_drop_rpath_1921.0.0.0.diff b/dynamic-layers/openembedded-layer/recipes-bsp/amt/files/lms_drop_rpath_1921.0.0.0.diff
deleted file mode 100644
index b68924a5..00000000
--- a/dynamic-layers/openembedded-layer/recipes-bsp/amt/files/lms_drop_rpath_1921.0.0.0.diff
+++ /dev/null
@@ -1,31 +0,0 @@
1From b92a5bda8015454a570990a3d9c4fba87010f1af Mon Sep 17 00:00:00 2001
2From: Alexander Usyskin <alexander.usyskin@intel.com>
3Date: Mon, 17 Jun 2019 13:27:33 +0300
4Subject: [PATCH] lms: drop rpath definitions
5
6Yocto fail to compile with this definitions.
7
8Upstream-Status: Inappropriate [configuration]
9Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
10---
11 CMakeLists.txt | 4 ----
12 1 file changed, 4 deletions(-)
13
14diff --git a/CMakeLists.txt b/CMakeLists.txt
15index 04257fb..85d0445 100644
16--- a/CMakeLists.txt
17+++ b/CMakeLists.txt
18@@ -107,10 +107,6 @@ install (DIRECTORY Docs/Licenses
19 FILES_MATCHING PATTERN "LICENSE.*"
20 )
21
22-list (APPEND CMAKE_INSTALL_RPATH "${PROJECT_BINARY_DIR}/UNS/GMS_COMMON")
23-list (APPEND CMAKE_INSTALL_RPATH "${PROJECT_BINARY_DIR}/WsmanClient")
24-list (APPEND CMAKE_INSTALL_RPATH "${PROJECT_BINARY_DIR}/UNS/StatusEventHandler")
25-
26 if (BUILD_TESTS)
27 include (gtest.cmake)
28 endif (BUILD_TESTS)
29--
302.7.4
31
diff --git a/dynamic-layers/openembedded-layer/recipes-bsp/amt/lms_1921.0.0.0.bb b/dynamic-layers/openembedded-layer/recipes-bsp/amt/lms_1921.0.0.0.bb
deleted file mode 100644
index 8ed74486..00000000
--- a/dynamic-layers/openembedded-layer/recipes-bsp/amt/lms_1921.0.0.0.bb
+++ /dev/null
@@ -1,39 +0,0 @@
1SUMMARY = "Intel(R) Local Managability Service"
2DESCRIPTION = "Intel Local Manageability Service allows applications \
3to access the Intel Active Management Technology (AMT) firmware via \
4the Intel Management Engine Interface (MEI)."
5
6LICENSE = "Apache-2.0"
7LIC_FILES_CHKSUM = "file://COPYING;md5=2ee41112a44fe7014dce33e26468ba93"
8
9COMPATIBLE_HOST = '(i.86|x86_64).*-linux'
10
11inherit cmake systemd
12
13DEPENDS = "metee ace xerces-c libnl libxml2 glib-2.0 glib-2.0-native connman"
14
15EXTRA_OECMAKE += "-DPYTHON_EXECUTABLE=${HOSTTOOLS_DIR}/python3"
16
17REQUIRED_DISTRO_FEATURES= "systemd"
18
19FILES_${PN} += "${datadir}/dbus-1/system-services/*.service"
20
21FILES_${PN} += "${libdir}/libLms*.so"
22FILES_SOLIBSDEV = ""
23INSANE_SKIP_${PN} += "dev-so"
24
25S = "${WORKDIR}/git"
26
27SYSTEMD_SERVICE_${PN} = "lms.service"
28
29SRC_URI = "git://github.com/intel/lms.git"
30SRCREV = "f7c374745ae7efb3ed7860fdc3f8abbb52dc9f8f"
31
32SRC_URI_append = " file://lms_drop_rpath_${PV}.diff"
33
34do_install_append() {
35 install -d ${D}${systemd_system_unitdir}
36 install -m 0644 ${B}/UNS/lms.service ${D}${systemd_system_unitdir}
37}
38
39RDEPENDS_${PN} += "ace"
diff --git a/dynamic-layers/openembedded-layer/recipes-bsp/amt/lms_2406.0.0.0.bb b/dynamic-layers/openembedded-layer/recipes-bsp/amt/lms_2406.0.0.0.bb
new file mode 100644
index 00000000..0a5a57ed
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-bsp/amt/lms_2406.0.0.0.bb
@@ -0,0 +1,43 @@
1SUMMARY = "Intel(R) Local Managability Service"
2DESCRIPTION = "Intel Local Manageability Service allows applications \
3to access the Intel Active Management Technology (AMT) firmware via \
4the Intel Management Engine Interface (MEI)."
5
6LICENSE = "Apache-2.0"
7LIC_FILES_CHKSUM = "file://COPYING;md5=2ee41112a44fe7014dce33e26468ba93"
8
9COMPATIBLE_HOST = '(i.86|x86_64).*-linux'
10
11COMPATIBLE_HOST:libc-musl = "null"
12
13inherit cmake systemd features_check python3native
14
15DEPENDS = "metee ace xerces-c libnl libxml2 glib-2.0 glib-2.0-native pkgconfig-native python3-packaging-native"
16
17# Enable either connman or networkmanager or none but not both.
18PACKAGECONFIG ??= "connman"
19PACKAGECONFIG[connman] = "-DNETWORK_CN=ON, -DNETWORK_CN=OFF, connman"
20PACKAGECONFIG[networkmanager] = "-DNETWORK_NM=ON, -DNETWORK_NM=OFF, networkmanager"
21
22REQUIRED_DISTRO_FEATURES = "systemd"
23
24FILES:${PN} += "${datadir}/dbus-1/system-services/*.service"
25
26SYSTEMD_SERVICE:${PN} = "lms.service"
27
28SRC_URI = "git://github.com/intel/lms.git;branch=master;protocol=https \
29 file://0001-LMS-fix-build-issue-with-gcc-15.patch \
30 "
31SRCREV = "388f115b2aeb3ea11499971c65f828daefd32c47"
32
33do_install:append() {
34 install -d ${D}${sysconfdir}/lms
35 install -d ${D}${systemd_system_unitdir}
36 install -m 0644 ${B}/UNS/lms.service ${D}${systemd_system_unitdir}
37 install -d ${D}${sysconfdir}/udev/rules.d
38 install -m 0644 ${S}/UNS/linux_scripts/70-mei-wdt.rules ${D}${sysconfdir}/udev/rules.d/70-mei-wdt.rules
39}
40
41RDEPENDS:${PN} += "ace"
42
43CVE_STATUS[CVE-2018-1000535] = "cpe-incorrect: This CVE is for a different LMS - Lan Management System."
diff --git a/dynamic-layers/openembedded-layer/recipes-bsp/thermald/thermald_2.5.8.bb b/dynamic-layers/openembedded-layer/recipes-bsp/thermald/thermald_2.5.8.bb
new file mode 100644
index 00000000..575a00e8
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-bsp/thermald/thermald_2.5.8.bb
@@ -0,0 +1,43 @@
1SUMMARY = "Linux thermal daemon"
2
3DESCRIPTION = "Thermal Daemon is a Linux daemon used to prevent the \
4overheating of platforms. This daemon monitors temperature and applies \
5compensation using available cooling methods."
6
7HOMEPAGE = "https://github.com/01org/thermal_daemon"
8
9DEPENDS = "dbus dbus-glib dbus-glib-native libxml2 glib-2.0 glib-2.0-native upower libevdev"
10DEPENDS += "autoconf-archive-native"
11
12LICENSE = "GPL-2.0-only"
13LIC_FILES_CHKSUM = "file://COPYING;md5=ea8831610e926e2e469075b52bf08848"
14
15SRC_URI = "git://github.com/intel/thermal_daemon/;branch=master;protocol=https \
16 "
17
18SRCREV = "df3b9ab0ffe780c4fbad7750987eff76f659cfd5"
19
20inherit pkgconfig autotools systemd gtk-doc
21
22# gtkdocsize runs before autotools do_configure and it copies gtk-doc.m4 and fails
23# to copy becuase there is no m4 dir yet.
24do_configure:prepend () {
25 mkdir -p ${S}/m4
26}
27
28EXTRA_OECONF = " \
29 --with-systemdsystemunitdir=${systemd_system_unitdir} \
30 "
31
32FILES:${PN} += "${datadir}/dbus-1"
33
34SYSTEMD_SERVICE:${PN} = "thermald.service"
35
36COMPATIBLE_HOST = '(i.86|x86_64).*-linux'
37
38CONFFILES:${PN} = " \
39 ${sysconfdir}/thermald/thermal-conf.xml \
40 ${sysconfdir}/thermald/thermal-cpu-cdev-order.xml \
41 "
42
43UPSTREAM_CHECK_URI = "https://github.com/01org/thermal_daemon/releases"
diff --git a/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0001-Add-print-function-to-print-test-run-status-in-ptest.patch b/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0001-Add-print-function-to-print-test-run-status-in-ptest.patch
new file mode 100644
index 00000000..4d583657
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0001-Add-print-function-to-print-test-run-status-in-ptest.patch
@@ -0,0 +1,53 @@
1From deccc0c69c2c8759c52885be8bdda54d3cee481c Mon Sep 17 00:00:00 2001
2From: Yogesh Tyagi <yogesh.tyagi@intel.com>
3Date: Sun, 11 Dec 2022 22:34:15 +0800
4Subject: [PATCH] Add print function to print test run status in ptest format
5
6Upstream-Status: Inappropriate [OE ptest specific]
7
8Signed-off-by: Yogesh Tyagi <yogesh.tyagi@intel.com>
9---
10 run_tests.py | 16 ++++++++++++++++
11 1 file changed, 16 insertions(+)
12
13diff --git a/run_tests.py b/run_tests.py
14index 1cd796dd..e3ffd1ab 100755
15--- a/run_tests.py
16+++ b/run_tests.py
17@@ -327,6 +327,9 @@ def run_test(testname, host, target):
18 else:
19 ispc_exe_rel = add_prefix(host.ispc_cmd, host, target)
20
21+ # to reslove the error '.rodata' can not be used when making a PIE object
22+ ispc_exe_rel = ispc_exe_rel + " --pic"
23+
24 # is this a test to make sure an error is issued?
25 want_error = (filename.find("tests_errors") != -1)
26 if want_error == True:
27@@ -795,6 +798,17 @@ def check_compiler_exists(compiler_exe):
28 return
29 error("missing the required compiler: %s \n" % compiler_exe, 1)
30
31+def print_test_run_status(results):
32+ for fstatus in results:
33+ if (fstatus[1] == Status.Success):
34+ print( "%s: %s" % ("PASS", fstatus[0]))
35+ elif (fstatus[1] == Status.Compfail):
36+ print( "%s: %s" % ("FAIL", fstatus[0]))
37+ elif (fstatus[1] == Status.Runfail):
38+ print( "%s: %s" % ("FAIL", fstatus[0]))
39+ elif (fstatus[1] == Status.Skip):
40+ print( "%s: %s" % ("SKIP", fstatus[0]))
41+
42 def print_result(status, results, s, run_tests_log, csv):
43 title = StatusStr[status]
44 file_list = [fname for fname, fstatus in results if status == fstatus]
45@@ -938,6 +952,8 @@ def run_tests(options1, args, print_version):
46 pass_rate = -1
47 print_debug("PASSRATE (%d/%d) = %d%% \n\n" % (len(run_succeed_files), total_tests_executed, pass_rate), s, run_tests_log)
48
49+ print_test_run_status(results)
50+
51 for status in Status:
52 print_result(status, results, s, run_tests_log, options.csv)
53 fails = [status != Status.Compfail and status != Status.Runfail for _, status in results]
diff --git a/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0001-Fix-QA-Issues.patch b/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0001-Fix-QA-Issues.patch
new file mode 100644
index 00000000..b0a76ff9
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0001-Fix-QA-Issues.patch
@@ -0,0 +1,36 @@
1From 7beff95c11071170eb27b6fa7d0cc77588caee8e Mon Sep 17 00:00:00 2001
2From: Yogesh Tyagi <yogesh.tyagi@intel.com>
3Date: Tue, 26 Jul 2022 15:25:10 +0800
4Subject: [PATCH] Fix QA Issues
5
6Stop ispc from inserting host file path in generated headers which leads to reproducibility problems.
7
8Upstream-Status: Inappropriate [OE build specific]
9
10Signed-off-by: Yogesh Tyagi <yogesh.tyagi@intel.com>
11---
12 src/module.cpp | 4 ++--
13 1 file changed, 2 insertions(+), 2 deletions(-)
14
15diff --git a/src/module.cpp b/src/module.cpp
16index e2084d2e..e2626865 100644
17--- a/src/module.cpp
18+++ b/src/module.cpp
19@@ -2555,7 +2555,7 @@ bool Module::writeHeader(const char *fn) {
20 perror("fopen");
21 return false;
22 }
23- fprintf(f, "//\n// %s\n// (Header automatically generated by the ispc compiler.)\n", fn);
24+ fprintf(f, "//\n// \n// (Header automatically generated by the ispc compiler.)\n");
25 fprintf(f, "// DO NOT EDIT THIS FILE.\n//\n\n");
26
27 // Create a nice guard string from the filename, turning any
28@@ -2677,7 +2677,7 @@ bool Module::writeDispatchHeader(DispatchHeaderInfo *DHI) {
29 FILE *f = DHI->file;
30
31 if (DHI->EmitFrontMatter) {
32- fprintf(f, "//\n// %s\n// (Header automatically generated by the ispc compiler.)\n", DHI->fn);
33+ fprintf(f, "//\n// \n// (Header automatically generated by the ispc compiler.)\n");
34 fprintf(f, "// DO NOT EDIT THIS FILE.\n//\n\n");
35 }
36 // Create a nice guard string from the filename, turning any
diff --git a/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0002-cmake-don-t-build-for-32-bit-targets.patch b/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0002-cmake-don-t-build-for-32-bit-targets.patch
new file mode 100644
index 00000000..f452dc50
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/0002-cmake-don-t-build-for-32-bit-targets.patch
@@ -0,0 +1,52 @@
1From 16a2c22339287122d2c25d8bb33a5a51b4e6ee51 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Thu, 24 Feb 2022 20:01:11 +0530
4Subject: [PATCH] cmake: don't build for 32-bit targets
5
6Error log:
7| tmp/work/corei7-64-poky-linux/ispc/1.16.0-r0/recipe-sysroot/usr/include/bits/long-double.h:23:10: fatal error: 'bits/long-double-32.h' file not found
8| #include <bits/long-double-32.h>
9| ^~~~~~~~~~~~~~~~~~~~~~~
10| 1 error generated.
11
12Remove SYSTEM include search path and set -isysroot dir path
13for root dir for cross compilation.
14
15Upstream-Status: Inappropriate
16
17Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
18---
19 cmake/GenerateBuiltins.cmake | 6 +++---
20 1 file changed, 3 insertions(+), 3 deletions(-)
21
22diff --git a/cmake/GenerateBuiltins.cmake b/cmake/GenerateBuiltins.cmake
23index f84494ed..d90cb1ec 100644
24--- a/cmake/GenerateBuiltins.cmake
25+++ b/cmake/GenerateBuiltins.cmake
26@@ -253,7 +253,7 @@ function(builtin_to_cpp bit os_name arch supported_archs supported_oses resultFi
27 # In this case headers will be installed in /usr/arm-linux-gnueabihf/include and will not be picked up
28 # by clang by default. So the following line adds such path explicitly. If this path doesn't exist and
29 # the headers can be found in other locations, this should not be a problem.
30- set(includePath -isystem/usr/${debian_triple}/include)
31+ set(includePath -isysroot${SYSROOT_DIR})
32 endif()
33 endif()
34
35@@ -339,7 +339,7 @@ function (generate_target_builtins resultList)
36 set(regular_targets ${ARGN})
37 list(FILTER regular_targets EXCLUDE REGEX wasm)
38 foreach (ispc_target ${regular_targets})
39- foreach (bit 32 64)
40+ foreach (bit 64)
41 foreach (os_name ${TARGET_OS_LIST_FOR_LL})
42 target_ll_to_cpp(target-${ispc_target} ${bit} ${os_name} output${os_name}${bit})
43 list(APPEND tmpList ${output${os_name}${bit}})
44@@ -405,7 +405,7 @@ function (generate_common_builtins resultList)
45 endif()
46
47 message (STATUS "ISPC will be built with support of ${supported_oses} for ${supported_archs}")
48- foreach (bit 32 64)
49+ foreach (bit 64)
50 foreach (os_name "windows" "linux" "freebsd" "macos" "android" "ios" "ps4" "web")
51 foreach (arch "x86" "arm" "wasm")
52 builtin_to_cpp(${bit} ${os_name} ${arch} "${supported_archs}" "${supported_oses}" res${bit}${os_name}${arch})
diff --git a/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/run-ptest b/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/run-ptest
new file mode 100644
index 00000000..77d13bb4
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc/run-ptest
@@ -0,0 +1,2 @@
1#!/bin/sh
2python3 run_tests.py
diff --git a/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc_1.24.0.bb b/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc_1.24.0.bb
new file mode 100644
index 00000000..f40e445c
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-core/ispc/ispc_1.24.0.bb
@@ -0,0 +1,62 @@
1SUMMARY = "Intel(R) Implicit SPMD Program Compiler"
2DESCRIPTION = "ispc is a compiler for a variant of the C programming language, \
3with extensions for single program, multiple data programming."
4HOMEPAGE = "https://github.com/ispc/ispc"
5
6LICENSE = "BSD-3-Clause & Apache-2.0-with-LLVM-exception"
7LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=da5ecffdd210b3cf776b32b41c182e87 \
8 file://third-party-programs.txt;md5=2061218c7be521556719c8b504bf9ddd"
9
10inherit cmake python3native ptest
11
12SRC_URI = "git://github.com/ispc/ispc.git;protocol=https;branch=main \
13 file://0002-cmake-don-t-build-for-32-bit-targets.patch \
14 file://0001-Fix-QA-Issues.patch \
15 file://0001-Add-print-function-to-print-test-run-status-in-ptest.patch \
16 file://run-ptest \
17 "
18
19SRCREV = "d394222aef59e4759b06e39ec160e4aba6ee5f40"
20
21COMPATIBLE_HOST = '(x86_64).*-linux'
22
23DEPENDS += " clang-native bison-native flex-native"
24DEPENDS:append:class-target = " clang"
25RDEPENDS:${PN}-ptest += " python3-multiprocessing"
26
27PACKAGECONFIG ??= "tbb"
28PACKAGECONFIG[tbb] = "-DISPCRT_BUILD_TASK_MODEL=TBB, -DISPCRT_BUILD_TASK_MODEL=OpenMP, tbb"
29
30YFLAGS = '-d -t -v -y --file-prefix-map=${WORKDIR}=/usr/src/debug/${PN}/${EXTENDPE}${PV}-${PR}'
31
32do_configure:prepend() {
33 sed -i -e 's#\${BISON_EXECUTABLE}.*#\${BISON_EXECUTABLE} ${YFLAGS} #g' ${S}/CMakeLists.txt
34 sed -i -e 's#\${FLEX_EXECUTABLE}.*#\${FLEX_EXECUTABLE} \-L #g' ${S}/CMakeLists.txt
35}
36
37do_install_ptest() {
38 cp -rf ${S}/run_tests.py ${D}${PTEST_PATH}
39 cp -rf ${S}/common.py ${D}${PTEST_PATH}
40 cp -rf ${S}/tests ${D}${PTEST_PATH}
41 cp -rf ${S}/test_static.isph ${D}${PTEST_PATH}
42 cp -rf ${S}/fail_db.txt ${D}${PTEST_PATH}
43 cp -rf ${S}/test_static.cpp ${D}${PTEST_PATH}
44}
45
46EXTRA_OECMAKE += " \
47 -DISPC_INCLUDE_TESTS=OFF \
48 -DISPC_INCLUDE_EXAMPLES=OFF \
49 -DARM_ENABLED=OFF \
50 -DISPC_CROSS=ON \
51 -DISPC_ANDROID_TARGET=OFF \
52 -DISPC_FREEBSD_TARGET=OFF \
53 -DISPC_WINDOWS_TARGET=OFF \
54 -DISPC_IOS_TARGET=OFF \
55 -DISPC_PS_TARGET=OFF \
56 -DSYSROOT_DIR=${STAGING_DIR} \
57 -DCLANG_EXECUTABLE=${STAGING_BINDIR_NATIVE}/clang \
58 -DCLANGPP_EXECUTABLE=${STAGING_BINDIR_NATIVE}/clang++ \
59 -DLLVM_AS_EXECUTABLE=${STAGING_BINDIR_NATIVE}/llvm-as \
60 "
61
62BBCLASSEXTEND = "native nativesdk"
diff --git a/dynamic-layers/openembedded-layer/recipes-core/levelzero/level-zero_1.21.1.bb b/dynamic-layers/openembedded-layer/recipes-core/levelzero/level-zero_1.21.1.bb
new file mode 100644
index 00000000..21b3956f
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-core/levelzero/level-zero_1.21.1.bb
@@ -0,0 +1,32 @@
1SUMMARY = "oneAPI Level Zero Specification Headers and Loader"
2HOMEPAGE = "https://github.com/oneapi-src/level-zero"
3LICENSE = "MIT"
4LIC_FILES_CHKSUM = "file://LICENSE;md5=97957beb2f7808ffa247e5d93e6442cc"
5
6SRC_URI = "git://github.com/oneapi-src/level-zero.git;protocol=https;branch=master"
7SRCREV = "9536683855b17a21508e5b54ba358225d6a976da"
8
9inherit cmake
10DEPENDS += "opencl-headers"
11
12UPSTREAM_CHECK_GITTAGREGEX = "^v(?P<pver>(\d+(\.\d+)+))$"
13
14PACKAGES =+ "${PN}-headers ${PN}-samples ${PN}-loader"
15
16do_install:append () {
17 install -d ${D}${bindir} ${D}${libdir}
18 install -m 755 ${B}/bin/zello* ${D}${bindir}
19
20 oe_libinstall -C lib libze_null ${D}${libdir}
21}
22
23
24FILES:${PN}-headers = "${includedir}"
25FILES:${PN}-samples = "${bindir} ${libdir}/libze_null* ${libdir}/libze_validation*"
26FILES:${PN}-loader = "${libdir}"
27
28# PN-loader (non -dev/-dbg/nativesdk- package) contains symlink .so
29INSANE_SKIP:${PN}-loader = "dev-so"
30INSANE_SKIP:${PN}-samples = "dev-so"
31ALLOW_EMPTY:${PN} = "1"
32BBCLASSEXTEND = "native nativesdk"
diff --git a/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0001-Fix-the-compilation-warning-when-using-gcc-13-25.patch b/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0001-Fix-the-compilation-warning-when-using-gcc-13-25.patch
new file mode 100644
index 00000000..2748d7ab
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0001-Fix-the-compilation-warning-when-using-gcc-13-25.patch
@@ -0,0 +1,99 @@
1From b57297c14d94dac9bdef7570b7b33d70b10171f3 Mon Sep 17 00:00:00 2001
2From: Jozef Wludzik <jozef.wludzik@intel.com>
3Date: Tue, 26 Mar 2024 14:43:29 +0100
4Subject: [PATCH 1/2] Fix the compilation warning when using gcc-13 (#25)
5
6Added missing headers. Fixed compilation error about casting from
7unsigned to signed int.
8
9Upstream-Status: Backport [https://github.com/intel/linux-npu-driver/commit/4bcbf2abe94eb4d9c083bd616b58e309a82d008a]
10
11Signed-off-by: Jozef Wludzik <jozef.wludzik@intel.com>
12Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
13---
14 umd/level_zero_driver/ext/source/graph/vcl_symbols.hpp | 7 ++++---
15 umd/vpu_driver/include/umd_common.hpp | 1 +
16 validation/umd-test/umd_prime_buffers.h | 9 +++++++--
17 validation/umd-test/utilities/data_handle.h | 1 +
18 4 files changed, 13 insertions(+), 5 deletions(-)
19
20diff --git a/umd/level_zero_driver/ext/source/graph/vcl_symbols.hpp b/umd/level_zero_driver/ext/source/graph/vcl_symbols.hpp
21index f206ebe..682e5b4 100644
22--- a/umd/level_zero_driver/ext/source/graph/vcl_symbols.hpp
23+++ b/umd/level_zero_driver/ext/source/graph/vcl_symbols.hpp
24@@ -5,12 +5,13 @@
25 *
26 */
27
28-#include <dlfcn.h>
29-#include <memory>
30-
31 #include "vpux_driver_compiler.h"
32 #include "vpu_driver/source/utilities/log.hpp"
33
34+#include <array>
35+#include <dlfcn.h>
36+#include <memory>
37+
38 class Vcl {
39 public:
40 static Vcl &sym() {
41diff --git a/umd/vpu_driver/include/umd_common.hpp b/umd/vpu_driver/include/umd_common.hpp
42index 0c874a3..5ad9be2 100644
43--- a/umd/vpu_driver/include/umd_common.hpp
44+++ b/umd/vpu_driver/include/umd_common.hpp
45@@ -7,6 +7,7 @@
46
47 #pragma once
48
49+#include <cstdint>
50 #include <limits>
51 #include <linux/kernel.h>
52 #include <stdexcept>
53diff --git a/validation/umd-test/umd_prime_buffers.h b/validation/umd-test/umd_prime_buffers.h
54index 6f7c7de..ab4814c 100644
55--- a/validation/umd-test/umd_prime_buffers.h
56+++ b/validation/umd-test/umd_prime_buffers.h
57@@ -6,12 +6,17 @@
58 */
59
60 #pragma once
61+
62+#include "umd_test.h"
63+
64 #include <fcntl.h>
65-#include <linux/kernel.h>
66 #include <linux/dma-buf.h>
67 #include <linux/dma-heap.h>
68+#include <linux/kernel.h>
69+#include <stdint.h>
70 #include <sys/ioctl.h>
71 #include <sys/mman.h>
72+#include <unistd.h>
73
74 #define ALLIGN_TO_PAGE(x) __ALIGN_KERNEL((x), (UmdTest::PAGE_SIZE))
75
76@@ -60,7 +65,7 @@ class PrimeBufferHelper {
77 return false;
78
79 bufferFd = heapAlloc.fd;
80- buffers.insert({heapAlloc.fd, {size, nullptr}});
81+ buffers.insert({static_cast<int>(heapAlloc.fd), {size, nullptr}});
82 return true;
83 }
84
85diff --git a/validation/umd-test/utilities/data_handle.h b/validation/umd-test/utilities/data_handle.h
86index d6e0ec0..5d937b2 100644
87--- a/validation/umd-test/utilities/data_handle.h
88+++ b/validation/umd-test/utilities/data_handle.h
89@@ -6,6 +6,7 @@
90 */
91
92 #include <linux/kernel.h>
93+#include <stdint.h>
94 #include <string>
95 #include <vector>
96
97--
982.43.0
99
diff --git a/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0001-linux-npu-driver-fix-multilib-install-issue.patch b/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0001-linux-npu-driver-fix-multilib-install-issue.patch
new file mode 100644
index 00000000..71a60b20
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0001-linux-npu-driver-fix-multilib-install-issue.patch
@@ -0,0 +1,28 @@
1From 561e3b5edc0ec3d8835aaf8ef8e5c9e8f9b53061 Mon Sep 17 00:00:00 2001
2From: Yogesh Tyagi <yogesh.tyagi@intel.com>
3Date: Wed, 28 May 2025 13:35:18 +0800
4Subject: [PATCH] linux-npu-driver : fix multilib install issue
5
6Upstream-Status: Inappropriate [oe specific]
7
8Signed-off-by: Yogesh Tyagi <yogesh.tyagi@intel.com>
9---
10 firmware/CMakeLists.txt | 2 +-
11 1 file changed, 1 insertion(+), 1 deletion(-)
12
13diff --git a/firmware/CMakeLists.txt b/firmware/CMakeLists.txt
14index 0c093ca..ba346a0 100644
15--- a/firmware/CMakeLists.txt
16+++ b/firmware/CMakeLists.txt
17@@ -12,7 +12,7 @@ target_include_directories(${PROJECT_NAME} INTERFACE include)
18 file(GLOB FIRMWARE_BINARIES ${CMAKE_CURRENT_SOURCE_DIR}/bin/*.bin)
19 if (FIRMWARE_BINARIES)
20 install(FILES ${FIRMWARE_BINARIES}
21- DESTINATION /lib/firmware/updates/intel/vpu/
22+ DESTINATION ${CMAKE_INSTALL_FIRMWARE_DIR}/firmware/updates/intel/vpu/
23 PERMISSIONS OWNER_READ
24 COMPONENT fw-npu)
25 endif()
26--
272.37.3
28
diff --git a/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0002-Fix-compilation-failure-with-GCC-14.patch b/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0002-Fix-compilation-failure-with-GCC-14.patch
new file mode 100644
index 00000000..9fb97354
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver/0002-Fix-compilation-failure-with-GCC-14.patch
@@ -0,0 +1,110 @@
1From a9f51fd88effb7d324609e692ca7da576d6dad2e Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Tue, 28 May 2024 10:23:42 +0800
4Subject: [PATCH 2/2] Fix compilation failure with GCC-14
5
6umd/level_zero_driver/core/source/event/event.cpp:65:31: error: 'remove_if' is not a member of 'std'; did you mean 'remove_cv'?
7| 65 | associatedJobs.erase(std::remove_if(associatedJobs.begin(),
8| | ^~~~~~~~~
9| | remove_cv
10
11| umd/vpu_driver/source/command/vpu_command.cpp: In member function 'void VPU::VPUCommand::appendAssociateBufferObject(VPU::VPUBufferObject*)':
12| umd/vpu_driver/source/command/vpu_command.cpp:126:20: error: 'find' is not a member of 'std'; did you mean 'bind'?
13| 126 | auto it = std::find(bufferObjects.begin(), bufferObjects.end(), bo);
14| | ^~~~
15| | bind
16
17| umd/vpu_driver/source/command/vpu_command_buffer.cpp: In member function 'bool VPU::VPUCommandBuffer::addCommand(VPU::VPUCommand*, uint64_t&, uint64_t&)':
18| umd/vpu_driver/source/command/vpu_command_buffer.cpp:185:24: error: 'find' is not a member of 'std'; did you mean 'bind'?
19| 185 | auto it = std::find(bufferHandles.begin(), bufferHandles.end(), bo->getHandle());
20| | ^~~~
21| | bind
22
23| umd/level_zero_driver/ext/source/graph/elf_parser.cpp:301:32: error: 'max_element' is not a member of 'std'; did you mean 'tuple_element'?
24| 301 | std::max_element(stride_begin + TENSOR_5D_STRIDE_C, stride_end));
25| | ^~~~~~~~~~~
26| | tuple_element
27| umd/level_zero_driver/ext/source/graph/elf_parser.cpp:315:37: error: 'max_element' is not a member of 'std'; did you mean 'tuple_element'?
28| 315 | auto max_stride_val = *std::max_element(stride_begin + TENSOR_4D_STRIDE_C, stride_end);
29| | ^~~~~~~~~~~
30
31| umd/level_zero_driver/tools/source/metrics/metric.cpp: In member function 'void L0::MetricContext::deactivateMetricGroups(int)':
32| umd/level_zero_driver/tools/source/metrics/metric.cpp:275:38: error: 'remove_if' is not a member of 'std'; did you mean 'remove_cv'?
33| 275 | activatedMetricGroups.erase(std::remove_if(activatedMetricGroups.begin(),
34| | ^~~~~~~~~
35| | remove_cv
36
37Upstream-Status: Submitted [https://github.com/intel/linux-npu-driver/pull/30]
38
39Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
40---
41 umd/level_zero_driver/core/source/event/event.cpp | 1 +
42 umd/level_zero_driver/ext/source/graph/elf_parser.cpp | 1 +
43 umd/level_zero_driver/tools/source/metrics/metric.cpp | 1 +
44 umd/vpu_driver/source/command/vpu_command.cpp | 1 +
45 umd/vpu_driver/source/command/vpu_command_buffer.cpp | 1 +
46 5 files changed, 5 insertions(+)
47
48diff --git a/umd/level_zero_driver/core/source/event/event.cpp b/umd/level_zero_driver/core/source/event/event.cpp
49index a92248f..196d176 100644
50--- a/umd/level_zero_driver/core/source/event/event.cpp
51+++ b/umd/level_zero_driver/core/source/event/event.cpp
52@@ -14,6 +14,7 @@
53
54 #include <level_zero/ze_api.h>
55 #include <thread>
56+#include <algorithm>
57
58 namespace L0 {
59
60diff --git a/umd/level_zero_driver/ext/source/graph/elf_parser.cpp b/umd/level_zero_driver/ext/source/graph/elf_parser.cpp
61index a1c8e14..dfbd61d 100644
62--- a/umd/level_zero_driver/ext/source/graph/elf_parser.cpp
63+++ b/umd/level_zero_driver/ext/source/graph/elf_parser.cpp
64@@ -21,6 +21,7 @@
65 #include <vpux_headers/metadata.hpp>
66 #include <vpux_elf/types/vpu_extensions.hpp>
67 #include <vpux_elf/utils/error.hpp>
68+#include <algorithm>
69
70 namespace L0 {
71
72diff --git a/umd/level_zero_driver/tools/source/metrics/metric.cpp b/umd/level_zero_driver/tools/source/metrics/metric.cpp
73index b67750f..9497311 100644
74--- a/umd/level_zero_driver/tools/source/metrics/metric.cpp
75+++ b/umd/level_zero_driver/tools/source/metrics/metric.cpp
76@@ -7,6 +7,7 @@
77
78 #include "level_zero_driver/tools/source/metrics/metric.hpp"
79 #include "vpu_driver/source/utilities/log.hpp"
80+#include <algorithm>
81
82 namespace L0 {
83
84diff --git a/umd/vpu_driver/source/command/vpu_command.cpp b/umd/vpu_driver/source/command/vpu_command.cpp
85index f4ca23f..75331d9 100644
86--- a/umd/vpu_driver/source/command/vpu_command.cpp
87+++ b/umd/vpu_driver/source/command/vpu_command.cpp
88@@ -14,6 +14,7 @@
89 #include <cstdint>
90 #include <vector>
91 #include <map>
92+#include <algorithm>
93
94 namespace VPU {
95
96diff --git a/umd/vpu_driver/source/command/vpu_command_buffer.cpp b/umd/vpu_driver/source/command/vpu_command_buffer.cpp
97index c4ad052..bbb80ec 100644
98--- a/umd/vpu_driver/source/command/vpu_command_buffer.cpp
99+++ b/umd/vpu_driver/source/command/vpu_command_buffer.cpp
100@@ -11,6 +11,7 @@
101 #include "vpu_driver/source/command/vpu_command_buffer.hpp"
102 #include "vpu_driver/source/command/vpu_copy_command.hpp"
103 #include "vpu_driver/source/utilities/log.hpp"
104+#include <algorithm>
105
106 namespace VPU {
107
108--
1092.43.0
110
diff --git a/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver_1.17.0.bb b/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver_1.17.0.bb
new file mode 100644
index 00000000..852bc0c1
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-core/linux-npu-driver/linux-npu-driver_1.17.0.bb
@@ -0,0 +1,43 @@
1SUMMARY = "User Mode Driver for IntelĀ® NPU device"
2HOMEPAGE = "https://github.com/intel/linux-npu-driver"
3LICENSE = "MIT & Apache-2.0"
4LIC_FILES_CHKSUM = "file://LICENSE.md;md5=7b256470048be42466f7d10e1d6482e6 \
5 file://third-party-programs.txt;md5=0ae40d7f1ef3bbd509197e427fdd7e70 \
6 file://third_party/vpux_elf/LICENSE;md5=a7a2dfa2b52a22cf2257893ee87ba11c"
7
8SRC_URI = "git://github.com/intel/linux-npu-driver.git;protocol=https;name=linux-npu-driver;branch=main;lfs=1 \
9 git://github.com/openvinotoolkit/npu_plugin_elf.git;protocol=https;destsuffix=${BB_GIT_DEFAULT_DESTSUFFIX}/third_party/vpux_elf;name=vpux-elf;nobranch=1 \
10 git://github.com/jbeder/yaml-cpp.git;protocol=https;destsuffix=${BB_GIT_DEFAULT_DESTSUFFIX}/third_party/yaml-cpp;name=yaml-cpp;nobranch=1 \
11 git://github.com/intel/level-zero-npu-extensions.git;protocol=https;destsuffix=${BB_GIT_DEFAULT_DESTSUFFIX}/third_party/level-zero-npu-extensions;name=lzvext;nobranch=1 \
12 git://github.com/google/googletest.git;protocol=https;destsuffix=${BB_GIT_DEFAULT_DESTSUFFIX}/third_party/googletest;name=googletest;nobranch=1 \
13 file://0001-linux-npu-driver-fix-multilib-install-issue.patch \
14 "
15
16SRCREV_linux-npu-driver = "0fe92dd0720448fb571f0ac4e5e64ef9f2ec3bd7"
17SRCREV_vpux-elf = "50f2b13dbb6dd435c3e2ef6f8abb7393633bfcdd"
18SRCREV_yaml-cpp = "f7320141120f720aecc4c32be25586e7da9eb978"
19SRCREV_lzvext = "c7d8f849d6a8195c1db38cbaca8d431cbabf3a6e"
20SRCREV_googletest = "b514bdc898e2951020cbdca1304b75f5950d1f59"
21SRCREV_FORMAT = "linux-npu-driver_vpux-elf_yaml-cpp_lzvext_googletest"
22
23inherit cmake
24
25
26# Fix warning _FORTIFY_SOURCE requires compiling with optimization (-O)
27EXTRA_OECMAKE += " -DCMAKE_BUILD_TYPE=Release "
28EXTRA_OECMAKE += " -DCMAKE_CXX_FLAGS_RELEASE=-O2 "
29
30EXTRA_OECMAKE += " -DCMAKE_CXX_FLAGS='-I${RECIPE_SYSROOT}/usr/include/level_zero'"
31EXTRA_OECMAKE += " -DCMAKE_INSTALL_FIRMWARE_DIR=${nonarch_base_libdir}"
32
33DEPENDS = "level-zero dpkg-native pkgconfig-native"
34
35PACKAGES =+ "${PN}-firmware ${PN}-tests"
36
37FILES:${PN}-firmware = "${nonarch_base_libdir}/firmware/updates/intel/vpu/*"
38FILES:${PN}-tests = "${bindir}"
39
40INSANE_SKIP:${PN} += "buildpaths"
41INSANE_SKIP:${PN}-dbg += "buildpaths"
42INSANE_SKIP:${PN}-tests += "buildpaths"
43INSANE_SKIP:${PN}-firmware += "buildpaths"
diff --git a/dynamic-layers/openembedded-layer/recipes-oneapi/dpcpp-compiler/intel-oneapi-dpcpp-cpp-runtime_2024.0.0-49819.bb b/dynamic-layers/openembedded-layer/recipes-oneapi/dpcpp-compiler/intel-oneapi-dpcpp-cpp-runtime_2024.0.0-49819.bb
new file mode 100644
index 00000000..106b3ad5
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-oneapi/dpcpp-compiler/intel-oneapi-dpcpp-cpp-runtime_2024.0.0-49819.bb
@@ -0,0 +1,54 @@
1SUMMARY = "IntelĀ® oneAPI DPC++/C++ Compiler runtime files"
2DESCRIPTION = "The IntelĀ® oneAPI DPC++/C++ Compiler provides optimizations \
3that help your applications run faster on IntelĀ® 64 architectures with support \
4for the latest C, C++, and SYCL language standards. This compiler produces \
5optimized code that can run significantly faster by taking advantage of the \
6ever-increasing core count and vector register width in IntelĀ® XeonĀ® processors \
7and compatible processors."
8
9HOMEPAGE = "https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html"
10
11LICENSE = "EULA"
12
13COMPILERMAINVER = "2024.0"
14
15LIC_FILES_CHKSUM = " \
16 file://opt/intel/oneapi/licensing/${COMPILERMAINVER}/licensing/${COMPILERMAINVER}/license.htm;md5=5ff64c6ff3ef98089ed69360e7a84c39 \
17 "
18COMPILERDOTVER = "2024.0.0-49406"
19DEVUTILITVERSION = "2024.0-2024.0.0-49320"
20SRC_URI = " \
21 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-compiler-dpcpp-cpp-runtime-${COMPILERMAINVER}-${PV}_amd64.deb;subdir=${BPN};name=dpcpp-runtime \
22 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-compiler-dpcpp-cpp-common-${COMPILERMAINVER}-${PV}_all.deb;subdir=${BPN};name=dpcpp-common \
23 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-compiler-shared-runtime-${COMPILERMAINVER}-${PV}_amd64.deb;subdir=${BPN};name=compiler-shared-runtime \
24 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-common-vars-${COMPILERDOTVER}_all.deb;subdir=${BPN};name=common-vars \
25 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-openmp-${COMPILERMAINVER}-${PV}_amd64.deb;subdir=${BPN};name=openmp \
26 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-openmp-common-${COMPILERMAINVER}-${PV}_all.deb;subdir=${BPN};name=openmp-common \
27 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-common-licensing-${COMPILERMAINVER}-${COMPILERDOTVER}_all.deb;subdir=${BPN};name=license \
28 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-dev-utilities-${DEVUTILITVERSION}_amd64.deb;subdir=${BPN};name=dev-utils \
29 "
30
31SRC_URI[dpcpp-runtime.sha256sum] = "e24f0ba69daf3f66ceaf23d5e632f183cdb90bac69f65407fdb4407fc9034f33"
32SRC_URI[dpcpp-common.sha256sum] = "f5a3db6a725598224edf1099260955aee3e36beadcaed2af5b8b453e873a82fa"
33SRC_URI[compiler-shared-runtime.sha256sum] = "bce010cbe076259ddd3feb8e69792869e22fccd5b4e2c9af9e815826f2c1a394"
34SRC_URI[common-vars.sha256sum] = "368553c99db1b52060b8225355336778be0b00e5991d0f769c42f891c6328750"
35SRC_URI[openmp.sha256sum] = "154ff1e81adfdc872ba1d47bd860de70d62188417c7128422435dfd0ceca62fe"
36SRC_URI[openmp-common.sha256sum] = "8217001d78311cbef97dd139e684c6767932b532309c3843ba57d7894d15c07d"
37SRC_URI[license.sha256sum] = "9f9c8a12fc0bc82ab5b71e118e66745eff23f42224eba304068225b366cd74b6"
38SRC_URI[dev-utils.sha256sum] = "c675d960a5abca361cead9217d6e74adee499ee0a095c4e44092bd710b304d50"
39
40S = "${UNPACKDIR}/${BPN}"
41
42inherit bin_package
43
44RDEPENDS:${PN} += "virtual-opencl-icd zlib tbb level-zero-loader bash tcsh"
45SKIP_FILEDEPS:${PN} = '1'
46
47INHIBIT_PACKAGE_STRIP = "1"
48INHIBIT_PACKAGE_DEBUG_SPLIT = "1"
49
50# doesn't have GNU_HASH (didn't pass LDFLAGS?)
51INSANE_SKIP:${PN} += "textrel dev-so dev-elf ldflags already-stripped staticdev rpaths arch useless-rpaths file-rdeps"
52
53FILES_SOLIBSDEV = ""
54BBCLASSEXTEND = "native nativesdk"
diff --git a/dynamic-layers/openembedded-layer/recipes-oneapi/dpcpp-compiler/intel-oneapi-dpcpp-cpp_2024.0.0-49819.bb b/dynamic-layers/openembedded-layer/recipes-oneapi/dpcpp-compiler/intel-oneapi-dpcpp-cpp_2024.0.0-49819.bb
new file mode 100644
index 00000000..d2bd72a3
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-oneapi/dpcpp-compiler/intel-oneapi-dpcpp-cpp_2024.0.0-49819.bb
@@ -0,0 +1,45 @@
1SUMMARY = "IntelĀ® oneAPI DPC++/C++ Compiler"
2DESCRIPTION = "The IntelĀ® oneAPI DPC++/C++ Compiler provides optimizations \
3that help your applications run faster on IntelĀ® 64 architectures with support \
4for the latest C, C++, and SYCL language standards. This compiler produces \
5optimized code that can run significantly faster by taking advantage of the \
6ever-increasing core count and vector register width in IntelĀ® XeonĀ® processors \
7and compatible processors."
8
9HOMEPAGE = "https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html"
10
11LICENSE = "EULA"
12
13COMPILERMAINVER = "2024.0"
14
15LIC_FILES_CHKSUM = " \
16 file://opt/intel/oneapi/compiler/${COMPILERMAINVER}/share/doc/compiler/credist.txt;md5=b41f55af9f479b9570fc35b955d5ba1a \
17 "
18
19SRC_URI = " \
20 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-dpcpp-cpp-${COMPILERMAINVER}-${PV}_amd64.deb;subdir=${BPN};name=icx-compiler \
21 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-compiler-shared-${COMPILERMAINVER}-${PV}_amd64.deb;subdir=${BPN};name=compiler-linker \
22 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-compiler-shared-common-${COMPILERMAINVER}-${PV}_all.deb;subdir=${BPN};name=shared-common \
23 "
24
25SRC_URI[icx-compiler.sha256sum] = "0dcbac766d5a1519d4cf393f5a85e71d19024fef65f77638f3f849796b62cd82"
26SRC_URI[compiler-linker.sha256sum] = "e00faea6d797934d62143e4aa70b727ce30f80fdf30769d22122b3051140c236"
27SRC_URI[shared-common.sha256sum] = "cf490a4a790f349da79e618359598d3b32312ca3b2639e5d4c84e1cfa2745558"
28
29S = "${UNPACKDIR}/${BPN}"
30
31inherit bin_package
32
33RDEPENDS:${PN} += "intel-oneapi-dpcpp-cpp-runtime"
34SKIP_FILEDEPS:${PN} = '1'
35
36INHIBIT_PACKAGE_STRIP = "1"
37INHIBIT_PACKAGE_DEBUG_SPLIT = "1"
38
39# doesn't have GNU_HASH (didn't pass LDFLAGS?)
40INSANE_SKIP:${PN} += "textrel dev-so dev-elf ldflags already-stripped file-rdeps staticdev rpaths arch useless-rpaths"
41
42FILES_SOLIBSDEV = ""
43
44EXCLUDE_FROM_SHLIBS = "1"
45BBCLASSEXTEND = "native nativesdk"
diff --git a/dynamic-layers/openembedded-layer/recipes-oneapi/embree/embree_4.3.3.bb b/dynamic-layers/openembedded-layer/recipes-oneapi/embree/embree_4.3.3.bb
new file mode 100644
index 00000000..77549479
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-oneapi/embree/embree_4.3.3.bb
@@ -0,0 +1,30 @@
1SUMMARY = "Collection of high-performance ray tracing kernels"
2DESCRIPTION = "A collection of high-performance ray tracing kernels \
3intended to graphics application engineers that want to improve the \
4performance of their application."
5HOMEPAGE = "https://github.com/embree/embree"
6
7LICENSE = "Apache-2.0 & BSD-3-Clause"
8LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=3b83ef96387f14655fc854ddc3c6bd57 \
9 file://third-party-programs.txt;md5=f989f5b74cfff0d45d3ccf0e1366cbdc \
10 file://common/math/transcendental.h;beginline=6;endline=8;md5=73380bb2ab6613b30b8464f114bd0ca8"
11
12inherit pkgconfig cmake
13
14SRC_URI = "git://github.com/embree/embree.git;protocol=https;branch=master"
15SRCREV = "5730b150471602d6dc02d9b7d8a4a6ce9ceffe16"
16
17COMPATIBLE_HOST = '(x86_64).*-linux'
18COMPATIBLE_HOST:libc-musl = "null"
19
20DEPENDS = "tbb jpeg libpng glfw ispc-native"
21
22EXTRA_OECMAKE += " \
23 -DEMBREE_IGNORE_CMAKE_CXX_FLAGS=OFF \
24 -DEMBREE_MAX_ISA=DEFAULT \
25 -DEMBREE_TUTORIALS=OFF \
26 -DEMBREE_ISPC_SUPPORT=ON \
27 -DEMBREE_ZIP_MODE=OFF \
28 "
29
30UPSTREAM_CHECK_GITTAGREGEX = "^v(?P<pver>(\d+(\.\d+)+))$"
diff --git a/dynamic-layers/openembedded-layer/recipes-oneapi/ipp/intel-oneapi-ipp_2021.10.0-653.bb b/dynamic-layers/openembedded-layer/recipes-oneapi/ipp/intel-oneapi-ipp_2021.10.0-653.bb
new file mode 100644
index 00000000..d10f50f9
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-oneapi/ipp/intel-oneapi-ipp_2021.10.0-653.bb
@@ -0,0 +1,35 @@
1DESCRIPTION = "IntelĀ® Integrated Performance Primitives are production-ready \
2 building blocks for cross-platform performance. Develop high-performance vision, \
3 signal, security, and storage applications with this multithreaded software library."
4HOMEPAGE = "https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/ipp.html"
5
6LICENSE = "ISSL"
7
8MAXVER = "2021.10"
9
10LIC_FILES_CHKSUM = " \
11 file://opt/intel/oneapi/ipp/${MAXVER}/share/doc/ipp/licensing/license.txt;md5=d7cdc92ed6c4de1263da879599ddc3e2 \
12 file://opt/intel/oneapi/ipp/${MAXVER}/share/doc/ipp/licensing/third-party-programs.txt;md5=22bd13987911dcf790907041b43081f3 \
13 "
14SRC_URI = " \
15 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-ipp-${MAXVER}-${PV}_amd64.deb;subdir=${BPN};name=ipp \
16 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-ipp-common-devel-${MAXVER}-${PV}_all.deb;subdir=${BPN};name=headers \
17 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-ipp-common-${MAXVER}-${PV}_all.deb;subdir=${BPN};name=env \
18 "
19
20SRC_URI[ipp.sha256sum] = "b51e45c6e691aa46c7136b0ab61f5abe346388433e017a30cf53fd23e92bea07"
21SRC_URI[headers.sha256sum] = "342f37ab2f82bc9f4498435f848ee660591c2488b44d988bf6ee96b2a71fd005"
22SRC_URI[env.sha256sum] = "731e8c28a3b8b757730cd874d0941de2eb744856128f24ade59d36c12b415bf6"
23
24S = "${UNPACKDIR}/${BPN}"
25
26COMPATIBLE_HOST:libc-musl = "null"
27
28inherit bin_package
29
30INHIBIT_PACKAGE_STRIP = "1"
31INHIBIT_PACKAGE_DEBUG_SPLIT = "1"
32INHIBIT_DEFAULT_DEPS = ""
33
34RDEPENDS:${PN} += "tbb setup-intel-oneapi-env"
35INSANE_SKIP:${PN} += "ldflags dev-so"
diff --git a/dynamic-layers/openembedded-layer/recipes-oneapi/mkl/intel-oneapi-mkl_2024.0.0-49656.bb b/dynamic-layers/openembedded-layer/recipes-oneapi/mkl/intel-oneapi-mkl_2024.0.0-49656.bb
new file mode 100644
index 00000000..e77ac588
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-oneapi/mkl/intel-oneapi-mkl_2024.0.0-49656.bb
@@ -0,0 +1,74 @@
1SUMMARY = "IntelĀ® oneAPI Math Kernel Library (oneMKL)"
2DESCRIPTION = "The IntelĀ® oneAPI Math Kernel Library (oneMKL) is a computing \
3 math library of highly optimized and extensively parallelized routines \
4 for applications that require maximum performance. oneMKL contains \
5 the high-performance optimizations from the full IntelĀ® Math Kernel Library \
6 for CPU architectures (with C/Fortran programming language interfaces)\
7 and adds to them a set of DPC++ programming language interfaces for \
8 achieving performance on various CPU architectures \
9 and Intel Graphics Technology for certain key functionalities."
10HOMEPAGE = "https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl.html"
11
12LICENSE = "ISSL"
13
14MKLMAINVER = "2024.0"
15
16LIC_FILES_CHKSUM = " \
17 file://opt/intel/oneapi/mkl/${MKLMAINVER}/share/doc/mkl/licensing/license.txt;md5=8510d21bf355a76e378c3216c3929ccd \
18 file://opt/intel/oneapi/mkl/${MKLMAINVER}/share/doc/mkl/licensing/third-party-programs-benchmarks.txt;md5=cb98e1a1f14c05ea85a979ea8982e7a4 \
19 file://opt/intel/oneapi/mkl/${MKLMAINVER}/share/doc/mkl/licensing/third-party-programs-ipp.txt;md5=a4b2bf15e38f5c1267cdafed18bc0b09 \
20 file://opt/intel/oneapi/mkl/${MKLMAINVER}/share/doc/mkl/licensing/third-party-programs-openmp.txt;md5=6b3c1aa2a11393060074c0346ce21e49 \
21 file://opt/intel/oneapi/mkl/${MKLMAINVER}/share/doc/mkl/licensing/third-party-programs-safestring.txt;md5=c3aeee91c6d35a0f0753aed6c2633b82 \
22 file://opt/intel/oneapi/mkl/${MKLMAINVER}/share/doc/mkl/licensing/third-party-programs.txt;md5=27de873e4084d62530fe828406b33ca9 \
23 "
24
25SRC_URI = " \
26 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-mkl-${MKLMAINVER}-${PV}_amd64.deb;subdir=${BPN};name=runtime \
27 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-mkl-common-devel-${MKLMAINVER}-${PV}_all.deb;subdir=${BPN};name=common-devel \
28 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-mkl-devel-${MKLMAINVER}-${PV}_amd64.deb;subdir=${BPN};name=devel \
29 https://apt.repos.intel.com/oneapi/pool/main/intel-oneapi-mkl-common-${MKLMAINVER}-${PV}_all.deb;subdir=${BPN};name=common-vars \
30 "
31
32SRC_URI[runtime.sha256sum] = "10a86e24051d6ef4a80fd839c570e629190638a3c0ac9bcca99ab855f534b959"
33SRC_URI[common-devel.sha256sum] = "adbf0ea946f63946d29b7f9c750c38a42ea7a65d8c81655b268aa2c7bb908192"
34SRC_URI[devel.sha256sum] = "fab2a6f15e18bfd9b4d425f2703e4e98928c57f52c4feebc9ed886f097062e84"
35SRC_URI[common-vars.sha256sum] = "ec2b67813739fa4a2895f63479a41acba2174afe2d0cb8a0c1c9119d1317d8ef"
36
37S = "${UNPACKDIR}/${BPN}"
38
39inherit bin_package
40
41do_install:append () {
42 install -d ${D}${bindir}
43 (cd ${D}${bindir} ; ln -s ../../opt/intel/oneapi/mkl/${MKLMAINVER}/bin/* .)
44 install -d ${D}${libdir}
45 (cd ${D}${libdir} ; ln -s ../../opt/intel/oneapi/mkl/${MKLMAINVER}/lib/intel64/*.so* .)
46 (cd ${D}${libdir} ; ln -s ../../opt/intel/oneapi/mkl/${MKLMAINVER}/lib/intel64/*.a* .)
47 install -d ${D}${libdir}/pkgconfig
48 (cd ${D}${libdir}/pkgconfig ; ln -s ../../../opt/intel/oneapi/mkl/${MKLMAINVER}/lib/pkgconfig/* .)
49 install -d ${D}${libdir}/cmake
50 (cd ${D}${libdir}/cmake ; ln -s ../../../opt/intel/oneapi/mkl/${MKLMAINVER}/lib/cmake/* .)
51
52 install -d ${D}${includedir}
53 find ${D}/opt/intel/oneapi/mkl/${MKLMAINVER}/include/ -mindepth 1 -maxdepth 1 -type d -printf '%f\n' | while read srcdir; do
54 install -d ${D}${includedir}/$srcdir
55 (cd ${D}${includedir} ; ln -s ../../opt/intel/oneapi/mkl/${MKLMAINVER}/include/$srcdir/* ./$srcdir/)
56 done
57
58 find ${D}/opt/intel/oneapi/mkl/${MKLMAINVER}/include/ -mindepth 1 -maxdepth 1 -type f -printf '%f\n' | while read srcfile; do
59 (cd ${D}${includedir} ; ln -s ../../opt/intel/oneapi/mkl/${MKLMAINVER}/include/$srcfile .)
60 done
61}
62
63AUTO_LIBNAME_PKGS = ""
64INHIBIT_PACKAGE_STRIP = "1"
65INHIBIT_PACKAGE_DEBUG_SPLIT = "1"
66
67RDEPENDS:${PN} += "bash tbb intel-oneapi-dpcpp-cpp-runtime setup-intel-oneapi-env virtual-opencl-icd"
68INSANE_SKIP:${PN} = "ldflags textrel dev-so staticdev arch already-stripped"
69
70FILES:${PN}-staticdev += "/opt/intel/oneapi/mkl/${MKLMAINVER}/lib/*.a*"
71
72SKIP_FILEDEPS:${PN} = '1'
73
74SYSROOT_DIRS += "/opt"
diff --git a/dynamic-layers/openembedded-layer/recipes-oneapi/oidn/oidn_2.1.0.bb b/dynamic-layers/openembedded-layer/recipes-oneapi/oidn/oidn_2.1.0.bb
new file mode 100644
index 00000000..8005f544
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-oneapi/oidn/oidn_2.1.0.bb
@@ -0,0 +1,27 @@
1SUMMARY = "IntelĀ® Open Image Denoise"
2DESCRIPTION = "Intel Open Image Denoise is an open source library of \
3high-performance, high-quality denoising filters for images \
4rendered with ray tracing. Intel Open Image Denoise is part \
5of the IntelĀ® oneAPI Rendering Toolkit"
6HOMEPAGE = "https://www.openimagedenoise.org/"
7
8LICENSE = "Apache-2.0 & BSD-3-Clause"
9LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=3b83ef96387f14655fc854ddc3c6bd57 \
10 file://external/mkl-dnn/LICENSE;md5=b48e3de3bfd47c27882a0d85b20823f5 \
11 file://external/mkl-dnn/src/cpu/x64/xbyak/COPYRIGHT;md5=3b9bf048d063d54cdb28964db558bcc7 \
12 file://external/mkl-dnn/src/common/ittnotify/LICENSE.BSD;md5=e671ff178b24a95a382ba670503c66fb \
13 file://weights/LICENSE.txt;md5=3b83ef96387f14655fc854ddc3c6bd57"
14
15SRC_URI = "https://github.com/OpenImageDenoise/${BPN}/releases/download/v${PV}/${BP}.src.tar.gz\
16 "
17SRC_URI[sha256sum] = "ce144ba582ff36563d9442ee07fa2a4d249bc85aa93e5b25fc527ff4ee755ed6"
18
19inherit cmake
20
21DEPENDS += "tbb ispc-native"
22
23do_install:append() {
24 chrpath -d ${D}${bindir}/* ${D}${libdir}/*${SOLIBS}
25}
26
27UPSTREAM_CHECK_URI = "https://github.com/OpenImageDenoise/oidn/releases"
diff --git a/dynamic-layers/openembedded-layer/recipes-oneapi/openvkl/openvkl_2.0.1.bb b/dynamic-layers/openembedded-layer/recipes-oneapi/openvkl/openvkl_2.0.1.bb
new file mode 100644
index 00000000..1b0b32ed
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-oneapi/openvkl/openvkl_2.0.1.bb
@@ -0,0 +1,40 @@
1SUMMARY = "Intel(R) Open Volume Kernel Library"
2DESCRIPTION = "Intel(R) Open Volume Kernel Library (Intel(R) Open VKL) is a \
3collection of high-performance volume computation kernels. The target users \
4of Open VKL are graphics application engineers who want to improve the \
5performance of their volume rendering applications by leveraging Open VKL’s \
6performance-optimized kernels, which include volume traversal and sampling \
7functionality for a variety of volumetric data formats. The kernels are optimized \
8for the latest Intel(R) processors with support for SSE, AVX, AVX2, and AVX-512 \
9instructions."
10HOMEPAGE = "https://www.openvkl.org/"
11
12LICENSE = "Apache-2.0 & BSD-3-Clause & MIT & Zlib"
13LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=3b83ef96387f14655fc854ddc3c6bd57 \
14 file://third-party-programs.txt;md5=69ec7caf49616c471161b921f53d5ec0 \
15 file://testing/external/half.hpp;beginline=1;endline=17;md5=4b60058493630c3bd0ef145470f04a7b"
16
17inherit pkgconfig cmake
18
19SRC_URI = "git://github.com/openvkl/openvkl.git;protocol=https;branch=master \
20 "
21SRCREV = "8c6ba526813b871a624cb9d73d4cbb689ac7f4ce"
22
23COMPATIBLE_HOST = '(x86_64).*-linux'
24COMPATIBLE_HOST:libc-musl = "null"
25
26DEPENDS = "ispc ispc-native rkcommon embree"
27
28EXTRA_OECMAKE += " \
29 -DISPC_EXECUTABLE=${STAGING_BINDIR_NATIVE}/ispc \
30 "
31EXTRA_OECMAKE:intel-corei7-64 += " \
32 -DOPENVKL_ISA_AVX=OFF \
33 -DOPENVKL_ISA_AVX2=OFF \
34 -DOPENVKL_ISA_AVX512SKX=OFF \
35 "
36
37PACKAGES =+ "${PN}-examples"
38FILES:${PN}-examples = "\
39 ${bindir} \
40 "
diff --git a/dynamic-layers/openembedded-layer/recipes-oneapi/ospray/ospray/0001-Fix-GCC11-Compile-Error-in-benchmark_register.h.patch b/dynamic-layers/openembedded-layer/recipes-oneapi/ospray/ospray/0001-Fix-GCC11-Compile-Error-in-benchmark_register.h.patch
new file mode 100644
index 00000000..c2136321
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-oneapi/ospray/ospray/0001-Fix-GCC11-Compile-Error-in-benchmark_register.h.patch
@@ -0,0 +1,28 @@
1From 09e6b6615d9a16e9555eff9d569356e32bd26aa4 Mon Sep 17 00:00:00 2001
2From: benradel <86351445+benradel@users.noreply.github.com>
3Date: Wed, 23 Jun 2021 11:56:03 +0200
4Subject: [PATCH] Fix GCC11 Compile Error in benchmark_register.h
5
6Fix GCC11 compilation error due to missing header "limits" in benchmark_register.h
7
8Upstream-Status: Submitted [https://github.com/ospray/ospray/pull/484/files]
9Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
10---
11 apps/common/external/benchmark/src/benchmark_register.h | 1 +
12 1 file changed, 1 insertion(+)
13
14diff --git a/apps/common/external/benchmark/src/benchmark_register.h b/apps/common/external/benchmark/src/benchmark_register.h
15index 61377d742..8f1bb7795 100644
16--- a/apps/common/external/benchmark/src/benchmark_register.h
17+++ b/apps/common/external/benchmark/src/benchmark_register.h
18@@ -2,6 +2,7 @@
19 #define BENCHMARK_REGISTER_H
20
21 #include <vector>
22+#include <limits>
23
24 #include "check.h"
25
26--
272.32.0
28
diff --git a/dynamic-layers/openembedded-layer/recipes-oneapi/ospray/ospray_3.2.0.bb b/dynamic-layers/openembedded-layer/recipes-oneapi/ospray/ospray_3.2.0.bb
new file mode 100644
index 00000000..d4e85cbc
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-oneapi/ospray/ospray_3.2.0.bb
@@ -0,0 +1,36 @@
1SUMMARY = "Intel OSPray, Ray Tracing based Rendering Engine"
2DESCRIPTION = "Intel OSPRay is an open source, scalable, and portable ray \
3tracing engine for high-performance, high-fidelity visualization on \
4Intel Architecture CPUs."
5HOMEPAGE = "https://www.ospray.org/"
6
7LICENSE = "Apache-2.0 & BSD-3-Clause & MIT & Zlib"
8LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=1dece7821bf3fd70fe1309eaa37d52a2 \
9 file://third-party-programs.txt;md5=e37b77e3bd997abccc359c710fb1f1db \
10 "
11
12inherit pkgconfig cmake
13
14SRC_URI = "git://github.com/ospray/ospray.git;protocol=https;branch=master \
15 git://github.com/nothings/stb;protocol=https;destsuffix=${BB_GIT_DEFAULT_DESTSUFFIX}/apps/common/external/stb_image/stb;name=stb;nobranch=1 \
16 "
17SRCREV = "85af2929937d516997451cbd52d352cf93125ed2"
18
19COMPATIBLE_HOST = '(x86_64).*-linux'
20COMPATIBLE_HOST:libc-musl = "null"
21
22DEPENDS = "rkcommon ispc ispc-native openvkl embree"
23
24EXTRA_OECMAKE += " \
25 -DISPC_EXECUTABLE=${STAGING_BINDIR_NATIVE}/ispc \
26 -DOSPRAY_ENABLE_APPS_BENCHMARK=OFF \
27 -DOSPRAY_ENABLE_APPS_TESTING=OFF \
28 -DOSPRAY_ENABLE_APPS_EXAMPLES=OFF \
29 "
30
31PACKAGES =+ "${PN}-apps"
32FILES:${PN}-apps = "\
33 ${bindir} \
34 "
35
36UPSTREAM_CHECK_GITTAGREGEX = "^v(?P<pver>(\d+(\.\d+)+))$"
diff --git a/dynamic-layers/openembedded-layer/recipes-oneapi/rkcommon/rkcommon_1.14.0.bb b/dynamic-layers/openembedded-layer/recipes-oneapi/rkcommon/rkcommon_1.14.0.bb
new file mode 100644
index 00000000..5289e9eb
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-oneapi/rkcommon/rkcommon_1.14.0.bb
@@ -0,0 +1,17 @@
1SUMMARY = "rkcommon - C++/CMake infrastructure"
2DESCRIPTION = "A common set of C++ infrastructure and CMake utilities \
3used by various components of IntelĀ® oneAPI Rendering Toolkit."
4HOMEPAGE = "https://github.com/ospray/rkcommon"
5
6LICENSE = "Apache-2.0"
7LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=3b83ef96387f14655fc854ddc3c6bd57"
8
9inherit pkgconfig cmake
10
11SRC_URI = "git://github.com/ospray/rkcommon.git;protocol=https;branch=master \
12 "
13SRCREV = "4a00047ae5a3ac705b6b33b4a7574588d91e7953"
14
15DEPENDS = "tbb"
16
17EXTRA_OECMAKE += " -DBUILD_TESTING=OFF"
diff --git a/dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl/0001-CMakeLists-disable-Werror.patch b/dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl/0001-CMakeLists-disable-Werror.patch
new file mode 100644
index 00000000..4474aa3c
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl/0001-CMakeLists-disable-Werror.patch
@@ -0,0 +1,38 @@
1From ef56be8e6bf2ea273cbeb960f3131164e7b517b6 Mon Sep 17 00:00:00 2001
2From: Anuj Mittal <anuj.mittal@intel.com>
3Date: Wed, 18 May 2022 12:24:03 +0800
4Subject: [PATCH] CMakeLists: disable Werror
5
6gcc12 highlights minor warnings that cause the build to fail. Ignore
7those until they aren't fixed upstream.
8
9Upstream-Status: Inappropriate
10
11Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
12---
13 CMakeLists.txt | 4 ++--
14 1 file changed, 2 insertions(+), 2 deletions(-)
15
16diff --git a/CMakeLists.txt b/CMakeLists.txt
17index 418483f7..c019fea0 100644
18--- a/CMakeLists.txt
19+++ b/CMakeLists.txt
20@@ -146,7 +146,7 @@ else()
21 set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2 -fno-strict-aliasing -D_FORTIFY_SOURCE=2")
22 if(LNX_BUILD)
23 #A few warnings yet to resolve under esx
24- set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Werror")
25+ #set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Werror")
26 set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Werror")
27 endif()
28 set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -ggdb")
29@@ -791,4 +791,4 @@ if(ESX_BUILD)
30 include(CMake/esx.cmake)
31 endif()
32
33-add_subdirectory(src/os/nvm_api_sample)
34\ No newline at end of file
35+add_subdirectory(src/os/nvm_api_sample)
36--
372.35.3
38
diff --git a/dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl/0001-Ignore-STATIC_ASSERTs-and-NULL-define-for-os-and-ut-builds.patch b/dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl/0001-Ignore-STATIC_ASSERTs-and-NULL-define-for-os-and-ut-builds.patch
new file mode 100644
index 00000000..8a734ed2
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl/0001-Ignore-STATIC_ASSERTs-and-NULL-define-for-os-and-ut-builds.patch
@@ -0,0 +1,62 @@
1Subject: [PATCH] [PATCH]: ignore static asserts and null define for os and ut
2 builds
3Upstream-Status: Backport
4Signed-off-by: Teoh Suh Haw <suh.haw.teoh@intel.com>
5---
6 MdePkg/Include/Base.h | 12 ++++++++++++
7 1 file changed, 12 insertions(+)
8
9Index: edk2/MdePkg/Include/Base.h
10===================================================================
11--- edk2.orig/MdePkg/Include/Base.h
12+++ edk2/MdePkg/Include/Base.h
13@@ -316,8 +316,12 @@ struct _LIST_ENTRY {
14 #define NULL __null
15 #endif
16 #else
17+#ifndef OS_BUILD
18+#ifndef UNIT_TEST_UEFI_BUILD
19 #define NULL ((VOID *) 0)
20 #endif
21+#endif
22+#endif
23
24 //
25 // Null character
26@@ -813,6 +817,8 @@ typedef UINTN *BASE_LIST;
27 // Section 2.3.1 of the UEFI 2.3 Specification.
28 //
29
30+#ifndef OS_BUILD
31+#ifndef UNIT_TEST_UEFI_BUILD
32 STATIC_ASSERT (sizeof (BOOLEAN) == 1, "sizeof (BOOLEAN) does not meet UEFI Specification Data Type requirements");
33 STATIC_ASSERT (sizeof (INT8) == 1, "sizeof (INT8) does not meet UEFI Specification Data Type requirements");
34 STATIC_ASSERT (sizeof (UINT8) == 1, "sizeof (UINT8) does not meet UEFI Specification Data Type requirements");
35@@ -841,7 +847,8 @@ STATIC_ASSERT (ALIGNOF (CHAR16) == size
36 STATIC_ASSERT (ALIGNOF (INTN) == sizeof (INTN), "Alignment of INTN does not meet UEFI Specification Data Type requirements");
37 STATIC_ASSERT (ALIGNOF (UINTN) == sizeof (UINTN), "Alignment of UINTN does not meet UEFI Specification Data Type requirements");
38 STATIC_ASSERT (ALIGNOF (VOID *) == sizeof (VOID *), "Alignment of VOID * does not meet UEFI Specification Data Type requirements");
39-
40+#endif
41+#endif
42 //
43 // The following three enum types are used to verify that the compiler
44 // configuration for enum types is compliant with Section 2.3.1 of the
45@@ -861,6 +868,8 @@ typedef enum {
46 __VerifyInt32EnumValue = 0x7fffffff
47 } __VERIFY_INT32_ENUM_SIZE;
48
49+#ifndef OS_BUILD
50+#ifndef UNIT_TEST_UEFI_BUILD
51 STATIC_ASSERT (sizeof (__VERIFY_UINT8_ENUM_SIZE) == 4, "Size of enum does not meet UEFI Specification Data Type requirements");
52 STATIC_ASSERT (sizeof (__VERIFY_UINT16_ENUM_SIZE) == 4, "Size of enum does not meet UEFI Specification Data Type requirements");
53 STATIC_ASSERT (sizeof (__VERIFY_INT32_ENUM_SIZE) == 4, "Size of enum does not meet UEFI Specification Data Type requirements");
54@@ -868,6 +877,8 @@ STATIC_ASSERT (sizeof (__VERIFY_INT32_EN
55 STATIC_ASSERT (ALIGNOF (__VERIFY_UINT8_ENUM_SIZE) == sizeof (__VERIFY_UINT8_ENUM_SIZE), "Alignment of enum does not meet UEFI Specification Data Type requirements");
56 STATIC_ASSERT (ALIGNOF (__VERIFY_UINT16_ENUM_SIZE) == sizeof (__VERIFY_UINT16_ENUM_SIZE), "Alignment of enum does not meet UEFI Specification Data Type requirements");
57 STATIC_ASSERT (ALIGNOF (__VERIFY_INT32_ENUM_SIZE) == sizeof (__VERIFY_INT32_ENUM_SIZE), "Alignment of enum does not meet UEFI Specification Data Type requirements");
58+#endif
59+#endif
60
61 /**
62 Macro that returns a pointer to the data structure that contains a specified field of
diff --git a/dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl_03.00.00.0499.bb b/dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl_03.00.00.0499.bb
new file mode 100644
index 00000000..7b95ffe4
--- /dev/null
+++ b/dynamic-layers/openembedded-layer/recipes-support/ipmctl/ipmctl_03.00.00.0499.bb
@@ -0,0 +1,45 @@
1SUMMARY = "Utility for managing Intel Optane DC persistent memory modules"
2DESCRIPTION = "Utility for configuring and managing Intel Optane Persistent \
3Memory modules (PMem). It supports functionality to: \
4Discover DCPMMs on the platform. \
5Provision the platform memory configuration. \
6View and update the firmware on DCPMMs. \
7Configure data-at-rest security on DCPMMs. \
8Track health and performance of DCPMMs. \
9Debug and troubleshoot DCPMMs."
10
11HOMEPAGE = "https://github.com/intel/ipmctl"
12BUGTRACKER = "https://github.com/intel/ipmctl/issues"
13
14LICENSE = "BSD-3-Clause | BSD-2-Clause"
15LIC_FILES_CHKSUM = "file://LICENSE;md5=72b9da60da6219d612ce30b746a0fe71 \
16 file://edk2/License.txt;md5=6123e5bf044a66db96c4ce88a36b2d08"
17
18SRC_URI = "git://github.com/intel/ipmctl.git;protocol=https;branch=master_3_0;name=ipmctl; \
19 git://github.com/tianocore/edk2.git;protocol=https;name=edk2;destsuffix=${BB_GIT_DEFAULT_DESTSUFFIX}/edk2;branch=master \
20 file://0001-Ignore-STATIC_ASSERTs-and-NULL-define-for-os-and-ut-builds.patch;patchdir=edk2 \
21 file://0001-CMakeLists-disable-Werror.patch \
22"
23
24SRCREV_ipmctl = "a71f2fb1c90dd07f9862b71c789881132193e8f9"
25#tag edk2-stable202408
26SRCREV_edk2 = "b158dad150bf02879668f72ce306445250838201"
27SRCREV_FORMAT = "ipmctl_edk2"
28
29inherit cmake dos2unix
30
31DEPENDS = "ndctl pkgconfig-native"
32
33EXTRA_OECMAKE = "-DRELEASE=ON"
34
35do_configure:prepend() {
36 for dir in BaseTools MdeModulePkg MdePkg ShellPkg ; do
37 ln -sf edk2/${dir} ${S}
38 done
39}
40
41do_install:append() {
42 # Remove /var/log/ipmctl as anything created in /var/log will not be
43 # available when tmpfs is mounted at /var/volatile/log.
44 rm -rf ${D}${localstatedir}/log
45}