From a12a2f03466cdc1d639f3d10a94fc22e0609c67c Mon Sep 17 00:00:00 2001 From: Anuj Mittal Date: Tue, 26 Mar 2019 14:08:36 +0800 Subject: clang: build Intel common-clang and spirv Common clang is a thin wrapper library around clang. Common clang has OpenCL-oriented API and is capable to compile OpenCL C kernels to SPIR-V modules. This adds a bbappend to clang recipe from meta-clang to build the necessary components and moves it to dynamic layers so it's built only when clang-layer is included. Signed-off-by: Anuj Mittal --- conf/layer.conf | 4 + .../recipes-devtools/clang/clang_%.bbappend | 13 + ...L-Change-type-of-block-pointer-for-OpenCL.patch | 156 +++ ...rOpenCL-pass-to-handle-new-blocks-represn.patch | 1119 ++++++++++++++++++++ .../0001-dont-export-targets-for-binaries.patch | 66 ++ .../clang/files/0001-point-to-correct-clang.patch | 59 ++ ...plify-LLVM-IR-generated-for-OpenCL-blocks.patch | 294 +++++ .../0003-OpenCL-Fix-assertion-due-to-blocks.patch | 61 ++ .../clang/llvm-project-source.bbappend | 16 + 9 files changed, 1788 insertions(+) create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend diff --git a/conf/layer.conf b/conf/layer.conf index 3e75b0fe..4cea5500 100644 --- a/conf/layer.conf +++ b/conf/layer.conf @@ -20,5 +20,9 @@ LAYERRECOMMENDS_intel = "dpdk intel-qat" LAYERVERSION_intel = "5" LAYERSERIES_COMPAT_intel = "thud warrior" +BBFILES_DYNAMIC += " \ + clang-layer:${LAYERDIR}/dynamic-layers/clang-layer/*/*/*.bb \ + clang-layer:${LAYERDIR}/dynamic-layers/clang-layer/*/*/*.bbappend \ +" require ${LAYERDIR}/conf/include/maintainers.inc diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend new file mode 100644 index 00000000..f8d5a252 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/clang_%.bbappend @@ -0,0 +1,13 @@ +FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:" + +DEPENDS_append = " opencl-clang-native" +LLVM_TARGETS_TO_BUILD = "X86" + +do_install_append_intel-x86-common() { + DESTDIR=${D} ninja -v install-cmake-exports +} + +LIBCPLUSPLUS = "" + +# undefined reference to `__atomic_load' on i*86. +COMPATIBLE_HOST = '(x86_64).*-linux' diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch new file mode 100644 index 00000000..1c491402 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch @@ -0,0 +1,156 @@ +From 39a3ac0065c23d1e2d55dfd8792cc28a146a4307 Mon Sep 17 00:00:00 2001 +From: Alexey Bader +Date: Tue, 19 Feb 2019 15:19:06 +0000 +Subject: [PATCH 1/2] [OpenCL] Change type of block pointer for OpenCL + +Summary: + +For some reason OpenCL blocks in LLVM IR are represented as function pointers. +These pointers do not point to any real function and never get called. Actually +they point to some structure, which in turn contains pointer to the real block +invoke function. +This patch changes represntation of OpenCL blocks in LLVM IR from function +pointers to pointers to `%struct.__block_literal_generic`. +Such representation allows to avoid unnecessary bitcasts and simplifies +further processing (e.g. translation to SPIR-V ) of the module for targets +which do not support function pointers. + +Patch by: Alexey Sotkin. + +Reviewers: Anastasia, yaxunl, svenvh + +Reviewed By: Anastasia + +Subscribers: alexbatashev, cfe-commits + +Tags: #clang + +Differential Revision: https://reviews.llvm.org/D58277 + +git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354337 91177308-0d34-0410-b5e6-96231b3b80d8 + +Upstream-Status: Backport +[https://github.com/llvm-mirror/clang/commit/283f308bdb5893bab1f36791711346e746045f94] +Signed-off-by: Anuj Mittal +--- + lib/CodeGen/CodeGenTypes.cpp | 4 +++- + test/CodeGenOpenCL/blocks.cl | 18 ++++++++---------- + test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 18 +++++++++--------- + 3 files changed, 20 insertions(+), 20 deletions(-) + +diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp +index 2acf1ac..93b3ebf 100644 +--- a/lib/CodeGen/CodeGenTypes.cpp ++++ b/lib/CodeGen/CodeGenTypes.cpp +@@ -637,7 +637,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { + + case Type::BlockPointer: { + const QualType FTy = cast(Ty)->getPointeeType(); +- llvm::Type *PointeeType = ConvertTypeForMem(FTy); ++ llvm::Type *PointeeType = CGM.getLangOpts().OpenCL ++ ? CGM.getGenericBlockLiteralType() ++ : ConvertTypeForMem(FTy); + unsigned AS = Context.getTargetAddressSpace(FTy); + ResultType = llvm::PointerType::get(PointeeType, AS); + break; +diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl +index 675240c..19aacc3 100644 +--- a/test/CodeGenOpenCL/blocks.cl ++++ b/test/CodeGenOpenCL/blocks.cl +@@ -35,11 +35,10 @@ void foo(){ + // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3 + // SPIR: %[[i_value:.*]] = load i32, i32* %i + // SPIR: store i32 %[[i_value]], i32* %[[block_captured]], +- // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()* +- // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)* +- // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]], +- // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]] +- // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* ++ // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic* ++ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* ++ // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]], ++ // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]] + // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 + // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* + // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] +@@ -50,11 +49,10 @@ void foo(){ + // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 + // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i + // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]], +- // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)* +- // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()* +- // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]], +- // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]] +- // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic* ++ // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)* ++ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic* ++ // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]], ++ // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]] + // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2 + // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8* + // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]] +diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +index 4732194..8445016 100644 +--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl ++++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +@@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t; + + // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself. + // COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) } +-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*) ++// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*) + + // For anonymous blocks without captures, emit block literals as global variable. + // COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +@@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { + // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue + // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke +- // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()* +- // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()* +- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* ++ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic* ++ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic* ++ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)* + // COMMON-LABEL: call i32 @__enqueue_kernel_basic( + // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), +@@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { + // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)* + // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke +- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* +- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* ++ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic* ++ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)* + // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events + // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), +@@ -300,13 +300,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { + // Emits global block literal [[BLG8]] and invoke function [[INVG8]]. + // The full type of these expressions are long (and repeated elsewhere), so we + // capture it as part of the regex for convenience and clarity. +- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A ++ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A + void (^const block_A)(void) = ^{ + return; + }; + + // Emits global block literal [[BLG9]] and invoke function [[INVG9]]. +- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B ++ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B + void (^const block_B)(local void *) = ^(local void *a) { + return; + }; +@@ -346,7 +346,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke + // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue + // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags +- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)* ++ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)* + // COMMON-LABEL: call i32 @__enqueue_kernel_basic( + // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), +-- +1.8.3.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch new file mode 100644 index 00000000..4a528674 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch @@ -0,0 +1,1119 @@ +From 9ce0fe02fd6cda5fb29fbb0d5037a1798a810b8a Mon Sep 17 00:00:00 2001 +From: Alexey Sotkin +Date: Thu, 21 Feb 2019 17:14:36 +0300 +Subject: [PATCH 1/3] Update LowerOpenCL pass to handle new blocks + represntation in LLVM IR + +Upstream-Status: Backport +[https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/bd6ddfaf7232cd81c7f2fe9877e66f286731bd8e] +Signed-off-by: Anuj Mittal +--- + lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 413 ++++++++---------------------- + test/global_block.ll | 71 ++--- + test/literal-struct.ll | 31 ++- + test/transcoding/block_w_struct_return.ll | 47 ++-- + test/transcoding/enqueue_kernel.ll | 237 ++++++++++------- + 5 files changed, 317 insertions(+), 482 deletions(-) + +diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp +index 50e1838..b42a4ec 100644 +--- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp ++++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp +@@ -1,303 +1,110 @@ +-//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===// +-// +-// The LLVM/SPIRV Translator +-// +-// This file is distributed under the University of Illinois Open Source +-// License. See LICENSE.TXT for details. +-// +-// Copyright (c) 2018 Intel Corporation. All rights reserved. +-// +-// Permission is hereby granted, free of charge, to any person obtaining a +-// copy of this software and associated documentation files (the "Software"), +-// to deal with the Software without restriction, including without limitation +-// the rights to use, copy, modify, merge, publish, distribute, sublicense, +-// and/or sell copies of the Software, and to permit persons to whom the +-// Software is furnished to do so, subject to the following conditions: +-// +-// Redistributions of source code must retain the above copyright notice, +-// this list of conditions and the following disclaimers. +-// Redistributions in binary form must reproduce the above copyright notice, +-// this list of conditions and the following disclaimers in the documentation +-// and/or other materials provided with the distribution. +-// Neither the names of Intel Corporation, nor the names of its +-// contributors may be used to endorse or promote products derived from this +-// Software without specific prior written permission. +-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +-// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH +-// THE SOFTWARE. +-// +-//===----------------------------------------------------------------------===// +-// +-// SPIR-V specification doesn't allow function pointers, so SPIR-V translator +-// is designed to fail if a value with function type (except calls) is occured. +-// Currently there is only two cases, when function pointers are generating in +-// LLVM IR in OpenCL - block calls and device side enqueue built-in calls. +-// +-// In both cases values with function type used as intermediate representation +-// for block literal structure. +-// +-// This pass is designed to find such cases and simplify them to avoid any +-// function pointer types occurrences in LLVM IR in 4 steps. +-// +-// 1. Find all function pointer allocas, like +-// %block = alloca void () * +-// +-// Then find a single store to that alloca: +-// %blockLit = alloca <{ i32, i32, ...}>, align 4 +-// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()* +-// > store void ()* %0, void ()** %block, align 4 +-// +-// And replace the alloca users by new instructions which used stored value +-// %blockLit itself instead of function pointer alloca %block. +-// +-// 2. Find consecutive casts from block literal type to i8 addrspace(4)* +-// used function pointers as an intermediate type: +-// %0 = bitcast <{ i32, i32 }> %block to void() * +-// %1 = addrspacecast void() * %0 to i8 addrspace(4)* +-// And simplify them: +-// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)* +-// +-// 3. Find all unused instructions with function pointer type occured after +-// pp.1-2 and remove them. +-// +-// 4. Find unused globals with function pointer type, like +-// @block = constant void ()* +-// bitcast ({ i32, i32 }* @__block_literal_global to void ()* +-// +-// And remove them. +-// +-//===----------------------------------------------------------------------===// +-#define DEBUG_TYPE "spv-lower-ocl-blocks" +- +-#include "OCLUtil.h" +-#include "SPIRVInternal.h" +- +-#include "llvm/ADT/SetVector.h" +-#include "llvm/Analysis/ValueTracking.h" +-#include "llvm/IR/GlobalVariable.h" +-#include "llvm/IR/InstIterator.h" +-#include "llvm/IR/Module.h" +-#include "llvm/Pass.h" +-#include "llvm/PassSupport.h" +-#include "llvm/Support/Casting.h" +- +-using namespace llvm; +- +-namespace { +- +-static void +-removeUnusedFunctionPtrInst(Instruction *I, +- SmallSetVector &FuncPtrInsts) { +- for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) { +- Instruction *OpI = dyn_cast(I->getOperand(OpIdx)); +- I->setOperand(OpIdx, nullptr); +- if (OpI && OpI != I && OpI->user_empty()) +- FuncPtrInsts.insert(OpI); +- } +- I->eraseFromParent(); +-} +- +-static bool isFuncPtrAlloca(const AllocaInst *AI) { +- auto *ET = dyn_cast(AI->getAllocatedType()); +- return ET && ET->getElementType()->isFunctionTy(); +-} +- +-static bool hasFuncPtrType(const Value *V) { +- auto *PT = dyn_cast(V->getType()); +- return PT && PT->getElementType()->isFunctionTy(); +-} +- +-static bool isFuncPtrInst(const Instruction *I) { +- if (auto *AI = dyn_cast(I)) +- return isFuncPtrAlloca(AI); +- +- for (auto &Op : I->operands()) { +- if (auto *AI = dyn_cast(Op)) +- return isFuncPtrAlloca(AI); +- +- auto *OpI = dyn_cast(&Op); +- if (OpI && OpI != I && hasFuncPtrType(OpI)) +- return true; +- } +- return false; +-} +- +-static StoreInst *findSingleStore(AllocaInst *AI) { +- StoreInst *Store = nullptr; +- for (auto *U : AI->users()) { +- if (!isa(U)) +- continue; // not a store +- if (Store) +- return nullptr; // there are more than one stores +- Store = dyn_cast(U); +- } +- return Store; +-} +- +-static void fixFunctionPtrAllocaUsers(AllocaInst *AI) { +- // Find and remove a single store to alloca +- auto *SingleStore = findSingleStore(AI); +- assert(SingleStore && "More than one store to the function pointer alloca"); +- auto *StoredVal = SingleStore->getValueOperand(); +- SingleStore->eraseFromParent(); +- +- // Find loads from the alloca and replace thier users +- for (auto *U : AI->users()) { +- auto *LI = dyn_cast(U); +- if (!LI) +- continue; +- +- for (auto *U : LI->users()) { +- auto *UInst = cast(U); +- auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast( +- StoredVal, UInst->getType(), "", UInst); +- UInst->replaceAllUsesWith(Cast); +- } +- } +-} +- +-static int getBlockLiteralIdx(const Function &F) { +- StringRef FName = F.getName(); +- if (isEnqueueKernelBI(FName)) +- return FName.contains("events") ? 7 : 4; +- if (isKernelQueryBI(FName)) +- return FName.contains("for_ndrange") ? 2 : 1; +- if (FName.startswith("__") && FName.contains("_block_invoke")) +- return F.hasStructRetAttr() ? 1 : 0; +- +- return -1; // No block literal argument +-} +- +-static bool hasBlockLiteralArg(const Function &F) { +- return getBlockLiteralIdx(F) != -1; +-} +- +-static bool simplifyFunctionPtrCasts(Function &F) { +- bool Changed = false; +- int BlockLiteralIdx = getBlockLiteralIdx(F); +- for (auto *U : F.users()) { +- auto *Call = dyn_cast(U); +- if (!Call) +- continue; +- if (Call->getFunction()->getName() == F.getName().str() + "_kernel") +- continue; // Skip block invoke function calls inside block invoke kernels +- +- const DataLayout &DL = F.getParent()->getDataLayout(); +- auto *BlockLiteral = Call->getOperand(BlockLiteralIdx); +- auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL); +- if (isa(BlockLiteralVal)) +- continue; // nothing to do with globals +- +- auto *BlockLiteralAlloca = cast(BlockLiteralVal); +- assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() && +- "Function type shouldn't be there"); +- +- auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast( +- BlockLiteralAlloca, BlockLiteral->getType(), "", Call); +- BlockLiteral->replaceAllUsesWith(NewBlockLiteral); +- Changed |= true; +- } +- return Changed; +-} +- +-static void +-findFunctionPtrAllocas(Module &M, +- SmallVectorImpl &FuncPtrAllocas) { +- for (auto &F : M) { +- if (F.isDeclaration()) +- continue; +- for (auto &I : instructions(F)) { +- auto *AI = dyn_cast(&I); +- if (!AI || !isFuncPtrAlloca(AI)) +- continue; +- FuncPtrAllocas.push_back(AI); +- } +- } +-} +- +-static void +-findUnusedFunctionPtrInsts(Module &M, +- SmallSetVector &FuncPtrInsts) { +- for (auto &F : M) { +- if (F.isDeclaration()) +- continue; +- for (auto &I : instructions(F)) +- if (I.user_empty() && isFuncPtrInst(&I)) +- FuncPtrInsts.insert(&I); +- } +-} +- +-static void +-findUnusedFunctionPtrGlbs(Module &M, +- SmallVectorImpl &FuncPtrGlbs) { +- for (auto &GV : M.globals()) { +- if (!GV.user_empty()) +- continue; +- auto *GVType = dyn_cast(GV.getType()->getElementType()); +- if (GVType && GVType->getElementType()->isFunctionTy()) +- FuncPtrGlbs.push_back(&GV); +- } +-} +- +-class SPIRVLowerOCLBlocks : public ModulePass { +- +-public: +- SPIRVLowerOCLBlocks() : ModulePass(ID) {} +- +- bool runOnModule(Module &M) { +- bool Changed = false; +- +- // 1. Find function pointer allocas and fix their users +- SmallVector FuncPtrAllocas; +- findFunctionPtrAllocas(M, FuncPtrAllocas); +- +- Changed |= !FuncPtrAllocas.empty(); +- for (auto *AI : FuncPtrAllocas) +- fixFunctionPtrAllocaUsers(AI); +- +- // 2. Simplify consecutive casts which use function pointer types +- for (auto &F : M) +- if (hasBlockLiteralArg(F)) +- Changed |= simplifyFunctionPtrCasts(F); +- +- // 3. Cleanup unused instructions with function pointer type +- // which are occured after pp. 1-2 +- SmallSetVector FuncPtrInsts; +- findUnusedFunctionPtrInsts(M, FuncPtrInsts); +- +- Changed |= !FuncPtrInsts.empty(); +- while (!FuncPtrInsts.empty()) { +- Instruction *I = FuncPtrInsts.pop_back_val(); +- removeUnusedFunctionPtrInst(I, FuncPtrInsts); +- } +- +- // 4. Find and remove unused global variables with function pointer type +- SmallVector FuncPtrGlbs; +- findUnusedFunctionPtrGlbs(M, FuncPtrGlbs); +- +- Changed |= !FuncPtrGlbs.empty(); +- for (auto *GV : FuncPtrGlbs) +- GV->eraseFromParent(); +- +- return Changed; +- } +- +- static char ID; +-}; // class SPIRVLowerOCLBlocks +- +-char SPIRVLowerOCLBlocks::ID = 0; +- +-} // namespace +- +-INITIALIZE_PASS( +- SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks", +- "Remove function pointers occured in case of using OpenCL blocks", false, +- false) +- +-llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() { +- return new SPIRVLowerOCLBlocks(); +-} ++//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===// ++// ++// The LLVM/SPIRV Translator ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++// Copyright (c) 2018 Intel Corporation. All rights reserved. ++// ++// Permission is hereby granted, free of charge, to any person obtaining a ++// copy of this software and associated documentation files (the "Software"), ++// to deal with the Software without restriction, including without limitation ++// the rights to use, copy, modify, merge, publish, distribute, sublicense, ++// and/or sell copies of the Software, and to permit persons to whom the ++// Software is furnished to do so, subject to the following conditions: ++// ++// Redistributions of source code must retain the above copyright notice, ++// this list of conditions and the following disclaimers. ++// Redistributions in binary form must reproduce the above copyright notice, ++// this list of conditions and the following disclaimers in the documentation ++// and/or other materials provided with the distribution. ++// Neither the names of Intel Corporation, nor the names of its ++// contributors may be used to endorse or promote products derived from this ++// Software without specific prior written permission. ++// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH ++// THE SOFTWARE. ++// ++//===----------------------------------------------------------------------===// ++// ++// SPIR-V specification doesn't allow function pointers, so SPIR-V translator ++// is designed to fail if a value with function type (except calls) is occured. ++// Currently there is only two cases, when function pointers are generating in ++// LLVM IR in OpenCL - block calls and device side enqueue built-in calls. ++// ++// In both cases values with function type used as intermediate representation ++// for block literal structure. ++// ++// In LLVM IR produced by clang, blocks are represented with the following ++// structure: ++// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } ++// Pointers to block invoke functions are stored in the third field. Clang ++// replaces inderect function calls in all cases except if block is passed as a ++// function argument. Note that it is somewhat unclear if the OpenCL C spec ++// should allow passing blocks as function argumernts. This pass is not supposed ++// to work correctly with such functions. ++// Clang though has to store function pointers to this structure. Purpose of ++// this pass is to replace store of function pointers(not allowed in SPIR-V) ++// with null pointers. ++// ++//===----------------------------------------------------------------------===// ++#define DEBUG_TYPE "spv-lower-ocl-blocks" ++ ++#include "SPIRVInternal.h" ++ ++#include "llvm/IR/Module.h" ++#include "llvm/Pass.h" ++#include "llvm/Support/Regex.h" ++ ++using namespace llvm; ++ ++namespace { ++ ++static bool isBlockInvoke(Function &F) { ++ static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$"); ++ return BlockInvokeRegex.match(F.getName()); ++} ++ ++class SPIRVLowerOCLBlocks : public ModulePass { ++ ++public: ++ SPIRVLowerOCLBlocks() : ModulePass(ID) {} ++ ++ bool runOnModule(Module &M) { ++ bool Changed = false; ++ for (Function &F : M) { ++ if (!isBlockInvoke(F)) ++ continue; ++ for (User *U : F.users()) { ++ if (!isa(U)) ++ continue; ++ Constant *Null = Constant::getNullValue(U->getType()); ++ if (U != Null) { ++ U->replaceAllUsesWith(Null); ++ Changed = true; ++ } ++ } ++ } ++ return Changed; ++ } ++ ++ static char ID; ++}; ++ ++char SPIRVLowerOCLBlocks::ID = 0; ++ ++} // namespace ++ ++INITIALIZE_PASS( ++ SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks", ++ "Remove function pointers occured in case of using OpenCL blocks", false, ++ false) ++ ++llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() { ++ return new SPIRVLowerOCLBlocks(); ++} +diff --git a/test/global_block.ll b/test/global_block.ll +index a9267d8..efb4cf3 100644 +--- a/test/global_block.ll ++++ b/test/global_block.ll +@@ -16,7 +16,7 @@ + ; RUN: llvm-spirv %t.bc -o %t.spv + ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM + +-target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" ++target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" + target triple = "spir-unknown-unknown" + + ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke" +@@ -26,71 +26,56 @@ target triple = "spir-unknown-unknown" + ; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]] + ; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]] + +-;; This variable is not needed in SPIRV +-; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1 +-; CHECK-LLVM-NOT: @block_kernel.b1 +-@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8 ++%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } + +-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 ++@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4 ++@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 + +-; Function Attrs: convergent nounwind +-define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { ++; Function Attrs: convergent noinline nounwind optnone ++define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { + entry: +- %res.addr = alloca i32 addrspace(1)*, align 8 +- store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 +- ++ %res.addr = alloca i32 addrspace(1)*, align 4 ++ store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4 + ; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]] + ; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5) +- %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 +- +- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 +- store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14 ++ %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 ++ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4 ++ store i32 %call, i32 addrspace(1)* %0, align 4 + ret void + } + +-; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]] ++; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]] + ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}} + ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}} + ; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}}) +-; Function Attrs: convergent nounwind ++; Function Attrs: convergent noinline nounwind optnone + define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 { + entry: +- %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 ++ %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 + %i.addr = alloca i32, align 4 +- store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* +- store i32 %i, i32* %i.addr, align 4, !tbaa !14 +- %0 = load i32, i32* %i.addr, align 4, !tbaa !14 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 ++ store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* ++ store i32 %i, i32* %i.addr, align 4 ++ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 ++ %0 = load i32, i32* %i.addr, align 4 + %add = add nsw i32 %0, 1 + ret i32 %add + } + +-attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +-attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #2 = { convergent } + + !llvm.module.flags = !{!0} +-!opencl.enable.FP_CONTRACT = !{} + !opencl.ocl.version = !{!1} + !opencl.spir.version = !{!1} +-!opencl.used.extensions = !{!2} +-!opencl.used.optional.core.features = !{!2} +-!opencl.compiler.options = !{!2} +-!llvm.ident = !{!3} ++!llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 2, i32 0} +-!2 = !{} +-!3 = !{!"clang version 7.0.0"} +-!4 = !{i32 1} +-!5 = !{!"none"} +-!6 = !{!"int*"} +-!7 = !{!""} +-!8 = !{i1 false} +-!9 = !{i32 0} +-!10 = !{!11, !11, i64 0} +-!11 = !{!"any pointer", !12, i64 0} +-!12 = !{!"omnipotent char", !13, i64 0} +-!13 = !{!"Simple C/C++ TBAA"} +-!14 = !{!15, !15, i64 0} +-!15 = !{!"int", !12, i64 0} ++!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} ++!3 = !{i32 1} ++!4 = !{!"none"} ++!5 = !{!"int*"} ++!6 = !{!""} +diff --git a/test/literal-struct.ll b/test/literal-struct.ll +index c52170a..52a731a 100644 +--- a/test/literal-struct.ll ++++ b/test/literal-struct.ll +@@ -2,7 +2,7 @@ + ; structs, i.e. structs whose type has no name. Typicaly clang generate such + ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with + ; the following command: +-; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll ++; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll + + ; literal-struct.cl: + ; void foo() +@@ -14,25 +14,28 @@ + ; RUN: llvm-as < %s | llvm-spirv -spirv-text -o %t + ; RUN: FileCheck < %t %s + +-; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0 +-; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}} ++; CHECK: TypeInt [[Int:[0-9]+]] 32 0 ++; CHECK: TypeInt [[Int8:[0-9]+]] 8 0 ++; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]] ++; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]] + + target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" + target triple = "spir" + +-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 ++%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } ++ ++@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 + ; CHECK: ConstantComposite [[StructType]] + +-; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct. +-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4 ++@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4 + ; CHECK: ConstantNull [[StructType]] + + ; Function Attrs: convergent noinline nounwind optnone + define spir_func void @foo() #0 { + entry: +- %myBlock = alloca void () addrspace(4)*, align 4 +- store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4 +- call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 ++ %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4 ++ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4 ++ call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 + ret void + } + +@@ -40,14 +43,14 @@ entry: + define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 { + entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 +- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* +- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* ++ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 + ret void + } + +-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { convergent } + + !llvm.module.flags = !{!0} +@@ -57,4 +60,4 @@ attributes #1 = { convergent } + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 2, i32 0} +-!2 = !{!"clang version 8.0.0 "} ++!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} +diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll +index 76e29f0..df89b13 100644 +--- a/test/transcoding/block_w_struct_return.ll ++++ b/test/transcoding/block_w_struct_return.ll +@@ -16,6 +16,8 @@ + ; res[tid] = kernelBlock(aa).a - 6; + ; } + ++; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll ++ + ; RUN: llvm-as %s -o %t.bc + ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt + ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV +@@ -27,12 +29,14 @@ + ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke" + + ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32 ++; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8 ++; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]] + ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]] + ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]] + + ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7 + ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7 +-; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}} ++; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}} + ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]] + + ; CHECK-LLVM: %[[StructA:.*]] = type { i32 } +@@ -41,20 +45,21 @@ + target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" + target triple = "spir64-unknown-unknown" + ++%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } + %struct.A = type { i32 } + +-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 ++@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8 + + ; Function Attrs: convergent noinline nounwind optnone +-define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 { ++define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { + entry: + %res.addr = alloca i32 addrspace(1)*, align 8 +- %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8 ++ %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8 + %tid = alloca i64, align 8 + %aa = alloca %struct.A, align 4 + %tmp = alloca %struct.A, align 4 + store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8 +- store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8 ++ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8 + %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4 + store i64 %call, i64* %tid, align 8 + %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8 +@@ -63,7 +68,7 @@ entry: + store i32 -1, i32 addrspace(1)* %arrayidx, align 4 + %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0 + store i32 5, i32* %a, align 4 +- call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 ++ call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 + %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0 + %2 = load i32, i32* %a1, align 4 + %sub = sub nsw i32 %2, 6 +@@ -78,10 +83,10 @@ entry: + define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 { + entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 +- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* +- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8 ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* ++ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8 + %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0 + store i32 6, i32* %a1, align 4 + %0 = bitcast %struct.A* %agg.result to i8* +@@ -96,30 +101,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r + ; Function Attrs: convergent nounwind readnone + declare spir_func i64 @_Z13get_global_idj(i32) #3 + +-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +-attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #2 = { argmemonly nounwind } + attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #4 = { convergent nounwind readnone } + attributes #5 = { convergent } + + !llvm.module.flags = !{!0} +-!opencl.enable.FP_CONTRACT = !{} + !opencl.ocl.version = !{!1} + !opencl.spir.version = !{!1} +-!opencl.used.extensions = !{!2} +-!opencl.used.optional.core.features = !{!2} +-!opencl.compiler.options = !{!2} +-!llvm.ident = !{!3} ++!llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 2, i32 0} +-!2 = !{} +-!3 = !{!"clang version 7.0.0"} +-!4 = !{i32 1} +-!5 = !{!"none"} +-!6 = !{!"int*"} +-!7 = !{!""} +-!8 = !{i1 false} +-!9 = !{i32 0} +- ++!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} ++!3 = !{i32 1} ++!4 = !{!"none"} ++!5 = !{!"int*"} ++!6 = !{!""} +diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll +index 0d29c71..435871d 100644 +--- a/test/transcoding/enqueue_kernel.ll ++++ b/test/transcoding/enqueue_kernel.ll +@@ -51,11 +51,12 @@ + ; ModuleID = 'enqueue_kernel.cl' + source_filename = "enqueue_kernel.cl" + target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +-target triple = "spir-unknown-unknown" ++target triple = "spir" + + %opencl.queue_t = type opaque + %struct.ndrange_t = type { i32 } + %opencl.clk_event_t = type opaque ++%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } + + ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel" + ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel" +@@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown" + + ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32 + ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8 +-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 + ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0 +-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17 ++; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21 + ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2 +-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20 +-; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] ++; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 ++; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24 + + ; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}} ++; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] ++; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] + ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]] + ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]] +-; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] + ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]] + ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] + ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] + ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] + ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]] + +-; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32 } +-; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }> +-; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> +-; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32 }> ++; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32, i8 addrspace(4)* } ++; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> ++; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> ++; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)* }> + +-; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 +-; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 ++; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 ++; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 + +-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 +-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 ++@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4 ++@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4 + + ; Function Attrs: convergent noinline nounwind optnone +-define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { ++define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { + entry: ++ %a.addr = alloca i32 addrspace(1)*, align 4 ++ %b.addr = alloca i32 addrspace(1)*, align 4 ++ %i.addr = alloca i32, align 4 ++ %c0.addr = alloca i8, align 1 + %default_queue = alloca %opencl.queue_t*, align 4 + %flags = alloca i32, align 4 + %ndrange = alloca %struct.ndrange_t, align 4 + %clk_event = alloca %opencl.clk_event_t*, align 4 + %event_wait_list = alloca %opencl.clk_event_t*, align 4 + %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4 +- %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4 +- %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 ++ %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4 ++ %tmp = alloca %struct.ndrange_t, align 4 ++ %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 ++ %tmp4 = alloca %struct.ndrange_t, align 4 + %c = alloca i8, align 1 ++ %tmp11 = alloca %struct.ndrange_t, align 4 ++ %block_sizes = alloca [1 x i32], align 4 ++ %tmp12 = alloca %struct.ndrange_t, align 4 ++ %block_sizes13 = alloca [3 x i32], align 4 ++ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4 ++ store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4 ++ store i32 %i, i32* %i.addr, align 4 ++ store i8 %c0, i8* %c0.addr, align 1 + store i32 0, i32* %flags, align 4 + %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 + %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4 + store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4 + %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 + %2 = load i32, i32* %flags, align 4 +- %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 +- store i32 17, i32* %block.size, align 4 +- %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 ++ %3 = bitcast %struct.ndrange_t* %tmp to i8* ++ %4 = bitcast %struct.ndrange_t* %ndrange to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false) ++ %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 ++ store i32 21, i32* %block.size, align 4 ++ %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 + store i32 4, i32* %block.align, align 4 +- %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 +- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4 +- %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 +- store i32 %i, i32* %block.captured1, align 4 +- %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 +- store i8 %c0, i8* %block.captured2, align 4 +- %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()* +- %4 = addrspacecast void ()* %3 to i8 addrspace(4)* ++ %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 ++ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4 ++ %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 ++ %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 ++ store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4 ++ %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 ++ %6 = load i32, i32* %i.addr, align 4 ++ store i32 %6, i32* %block.captured1, align 4 ++ %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5 ++ %7 = load i8, i8* %c0.addr, align 1 ++ store i8 %7, i8* %block.captured2, align 4 ++ %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic* ++ %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)* + + ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]] + ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} + ; [[ConstInt0]] [[EventNull]] [[EventNull]] + ; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]] + +-; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)* ++; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic* ++; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)* + ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)* +-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]]) +- +- %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4) +- %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* +- %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* +- %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 +- store i32 20, i32* %block.size5, align 4 +- %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 ++; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]]) ++ ++ %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) ++ %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 ++ %12 = load i32, i32* %flags, align 4 ++ %13 = bitcast %struct.ndrange_t* %tmp4 to i8* ++ %14 = bitcast %struct.ndrange_t* %ndrange to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false) ++ %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* ++ %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* ++ %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 ++ store i32 24, i32* %block.size5, align 4 ++ %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 + store i32 4, i32* %block.align6, align 4 +- %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 +- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4 +- %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 +- store i32 %i, i32* %block.captured8, align 4 +- %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 +- store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4 +- %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()* +- %9 = addrspacecast void ()* %8 to i8 addrspace(4)* ++ %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 ++ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4 ++ %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 ++ %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 ++ store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4 ++ %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 ++ %18 = load i32, i32* %i.addr, align 4 ++ store i32 %18, i32* %block.captured9, align 4 ++ %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5 ++ %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4 ++ store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4 ++ %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic* ++ %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)* ++ + + ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]] + ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]] +@@ -158,16 +193,24 @@ entry: + ; [[ConstInt2]] [[Event1]] [[Event2]] + ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]] + +-; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)* ++; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic* ++; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4) + ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)* +-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]]) +- +- %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) +- %11 = alloca [1 x i32] +- %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0 +- %13 = load i8, i8* %c, align 1 +- %14 = zext i8 %13 to i32 +- store i32 %14, i32* %12, align 4 ++; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]]) ++ ++ %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21) ++ %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 ++ %24 = load i32, i32* %flags, align 4 ++ %25 = bitcast %struct.ndrange_t* %tmp11 to i8* ++ %26 = bitcast %struct.ndrange_t* %ndrange to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false) ++ %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 ++ %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)* ++ %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* ++ %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0 ++ %30 = load i8, i8* %c, align 1 ++ %31 = zext i8 %30 to i32 ++ store i32 %31, i32* %29, align 4 + + ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]] + ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]] +@@ -182,14 +225,18 @@ entry: + ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)* + ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}}) + +- %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12) +- %16 = alloca [3 x i32] +- %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0 +- store i32 1, i32* %17, align 4 +- %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1 +- store i32 2, i32* %18, align 4 +- %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2 +- store i32 4, i32* %19, align 4 ++ %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29) ++ %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 ++ %34 = load i32, i32* %flags, align 4 ++ %35 = bitcast %struct.ndrange_t* %tmp12 to i8* ++ %36 = bitcast %struct.ndrange_t* %ndrange to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false) ++ %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0 ++ store i32 1, i32* %37, align 4 ++ %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1 ++ store i32 2, i32* %38, align 4 ++ %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2 ++ store i32 4, i32* %39, align 4 + + ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]] + ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]] +@@ -206,24 +253,27 @@ entry: + ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)* + ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}}) + +- %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17) ++ %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37) + ret void + } + ++; Function Attrs: argmemonly nounwind ++declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1 ++ + ; Function Attrs: convergent noinline nounwind optnone + define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 { + entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 +- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* +- store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 +- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* ++ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 ++ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5 + %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4 + %conv = sext i8 %0 to i32 +- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2 ++ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 + %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4 +- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 ++ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 + %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2 + store i32 %conv, i32 addrspace(1)* %arrayidx, align 4 +@@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i + define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 { + entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 +- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* +- store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 +- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* ++ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 ++ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5 + %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4 +- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 ++ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 + %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1 + %2 = load i32, i32 addrspace(1)* %arrayidx, align 4 +- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2 ++ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 + %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4 +- %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 ++ %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 + %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4 + %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4 + store i32 %2, i32 addrspace(1)* %arrayidx4, align 4 +@@ -276,11 +326,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac + entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 + %p.addr = alloca i8 addrspace(3)*, align 4 +- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* + store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4 +- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 ++ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 + ret void + } + +@@ -300,13 +350,13 @@ entry: + %p1.addr = alloca i8 addrspace(3)*, align 4 + %p2.addr = alloca i8 addrspace(3)*, align 4 + %p3.addr = alloca i8 addrspace(3)*, align 4 +- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* + store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4 + store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4 + store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4 +- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 ++ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 + ret void + } + +@@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*, + ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*) + ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*) + +-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { argmemonly nounwind } +-attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #3 = { nounwind } + + !llvm.module.flags = !{!0} +-!opencl.enable.FP_CONTRACT = !{} + !opencl.ocl.version = !{!1} + !opencl.spir.version = !{!1} +-!opencl.used.extensions = !{!2} +-!opencl.used.optional.core.features = !{!2} +-!opencl.compiler.options = !{!2} +-!llvm.ident = !{!3} ++!llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 2, i32 0} +-!2 = !{} +-!3 = !{!"clang version 7.0.0"} +-!4 = !{i32 1, i32 1, i32 0, i32 0} +-!5 = !{!"none", !"none", !"none", !"none"} +-!6 = !{!"int*", !"int*", !"int", !"char"} +-!7 = !{!"", !"", !"", !""} +-!8 = !{i1 false, i1 false, i1 false, i1 false} +-!9 = !{i32 0, i32 0, i32 0, i32 0} ++!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} ++!3 = !{i32 1, i32 1, i32 0, i32 0} ++!4 = !{!"none", !"none", !"none", !"none"} ++!5 = !{!"int*", !"int*", !"int", !"char"} ++!6 = !{!"", !"", !"", !""} +-- +1.8.3.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch new file mode 100644 index 00000000..9d25bbad --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-dont-export-targets-for-binaries.patch @@ -0,0 +1,66 @@ +From 7bbd0058362ac3bb5edd7a82d43e1785810776b3 Mon Sep 17 00:00:00 2001 +From: Anuj Mittal +Date: Fri, 29 Mar 2019 08:56:53 +0800 +Subject: [PATCH] dont export targets for binaries + +The projects using LLVM cmake modules look for target binaries in +sysroot as a result which isn't desirable in this case and isn't needed +either. + +Upstream-Status: Inappropriate [cross-compile specific] + +Signed-off-by: Anuj Mittal +--- + llvm/cmake/modules/AddLLVM.cmake | 9 --------- + llvm/cmake/modules/TableGen.cmake | 6 ------ + 2 files changed, 15 deletions(-) + +diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake +index 0df6845..b79f4fa 100644 +--- a/llvm/cmake/modules/AddLLVM.cmake ++++ b/llvm/cmake/modules/AddLLVM.cmake +@@ -866,12 +866,6 @@ macro(add_llvm_tool name) + + if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + if( LLVM_BUILD_TOOLS ) +- if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR +- NOT LLVM_DISTRIBUTION_COMPONENTS) +- set(export_to_llvmexports EXPORT LLVMExports) +- set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True) +- endif() +- + install(TARGETS ${name} + ${export_to_llvmexports} + RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR} +@@ -884,9 +878,6 @@ macro(add_llvm_tool name) + endif() + endif() + endif() +- if( LLVM_BUILD_TOOLS ) +- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) +- endif() + set_target_properties(${name} PROPERTIES FOLDER "Tools") + endmacro(add_llvm_tool name) + +diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake +index 3c84ae7..141219f 100644 +--- a/llvm/cmake/modules/TableGen.cmake ++++ b/llvm/cmake/modules/TableGen.cmake +@@ -164,14 +164,8 @@ macro(add_tablegen target project) + endif() + + if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY) +- if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR +- NOT LLVM_DISTRIBUTION_COMPONENTS) +- set(export_to_llvmexports EXPORT LLVMExports) +- endif() +- + install(TARGETS ${target} + ${export_to_llvmexports} + RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR}) + endif() +- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target}) + endmacro() +-- +2.7.4 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch new file mode 100644 index 00000000..0dfc537b --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-point-to-correct-clang.patch @@ -0,0 +1,59 @@ +From 6c33fb58869ffb17106047c45ab8d3856966eaf7 Mon Sep 17 00:00:00 2001 +From: Anuj Mittal +Date: Tue, 26 Mar 2019 14:11:29 +0800 +Subject: [PATCH] point to correct clang project and tblgen + +Point to correct path for clang project as per the way we unpack. Also +let llvm-tblgen path be passed from recipe itself. + +Also since we're going to do the patching ourselves, no need to look for +git through cmake. + +Upstream-Status: Inappropriate [OE specific] +--- + CMakeLists.txt | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 174133b..c769f08 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -53,7 +53,7 @@ endif(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + include(AddLLVM) + include(TableGen) + +-find_package(Git REQUIRED) ++#find_package(Git REQUIRED) + + if (NOT WIN32) + add_subdirectory( linux_linker ) +@@ -80,7 +80,7 @@ set(TARGET_NAME ${COMMON_CLANG_LIBRARY_NAME}${BUILD_PLATFORM} ) + + if(NOT USE_PREBUILT_LLVM) + set(TARGET_BRANCH "ocl-open-80") +- set(CLANG_SOURCE_DIR ${LLVM_SOURCE_DIR}/tools/clang) ++ set(CLANG_SOURCE_DIR ${LLVM_SOURCE_DIR}/../clang) + set(CLANG_BASE_REVISION a03da8be08a208122e292016cb6cea1f30229677) + + set(SPIRV_SOURCE_DIR ${LLVM_SOURCE_DIR}/projects/llvm-spirv) +@@ -102,7 +102,7 @@ endif(NOT USE_PREBUILT_LLVM) + set (COMPILE_OPTIONS_TD opencl_clang_options.td) + set (COMPILE_OPTIONS_INC opencl_clang_options.inc) + +-set(LLVM_TABLEGEN_EXE "llvm-tblgen") ++#set(LLVM_TABLEGEN_EXE "llvm-tblgen") + set(LLVM_TARGET_DEFINITIONS ${COMPILE_OPTIONS_TD}) + if(USE_PREBUILT_LLVM) + set(TABLEGEN_ADDITIONAL -I ${LLVM_INCLUDE_DIRS}) +@@ -153,7 +153,7 @@ endif() + + if(NOT USE_PREBUILT_LLVM) + set(CLANG_BINARY_DIR ${LLVM_BINARY_DIR}/tools/clang/) +- set(CLANG_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}/tools/clang/) ++ set(CLANG_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}/../clang/) + include_directories( + ${CLANG_BINARY_DIR}/include # for tablegened includes + ${CLANG_SOURCE_DIR}/include # for basic headers +-- +2.19.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch new file mode 100644 index 00000000..2e935a13 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch @@ -0,0 +1,294 @@ +From c94ec28600255098ffb9d73d1b386a7c8a535590 Mon Sep 17 00:00:00 2001 +From: Andrew Savonichev +Date: Thu, 21 Feb 2019 11:02:10 +0000 +Subject: [PATCH 2/2] [OpenCL] Simplify LLVM IR generated for OpenCL blocks + +Summary: +Emit direct call of block invoke functions when possible, i.e. in case the +block is not passed as a function argument. +Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()` + +Reviewers: Anastasia, yaxunl, svenvh + +Reviewed By: Anastasia + +Subscribers: cfe-commits + +Tags: #clang + +Differential Revision: https://reviews.llvm.org/D58388 + +git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354568 91177308-0d34-0410-b5e6-96231b3b80d8 + +Upstream-Status: Backport +[https://github.com/llvm-mirror/clang/commit/eae71f8d05ce550c4e2595c9b7082cc2c7882c58] +Signed-off-by: Anuj Mittal +--- + lib/CodeGen/CGBlocks.cpp | 77 +++++++++++++------------- + lib/CodeGen/CGOpenCLRuntime.cpp | 30 +++++++--- + lib/CodeGen/CGOpenCLRuntime.h | 4 ++ + test/CodeGenOpenCL/blocks.cl | 10 +--- + test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 34 +++++++++--- + 5 files changed, 91 insertions(+), 64 deletions(-) + +diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp +index fa3c3ee..10a0238 100644 +--- a/lib/CodeGen/CGBlocks.cpp ++++ b/lib/CodeGen/CGBlocks.cpp +@@ -1261,52 +1261,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue) { + const BlockPointerType *BPT = + E->getCallee()->getType()->getAs(); +- + llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); +- +- // Get a pointer to the generic block literal. +- // For OpenCL we generate generic AS void ptr to be able to reuse the same +- // block definition for blocks with captures generated as private AS local +- // variables and without captures generated as global AS program scope +- // variables. +- unsigned AddrSpace = 0; +- if (getLangOpts().OpenCL) +- AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); +- +- llvm::Type *BlockLiteralTy = +- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); +- +- // Bitcast the callee to a block literal. +- BlockPtr = +- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); +- +- // Get the function pointer from the literal. +- llvm::Value *FuncPtr = +- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, +- CGM.getLangOpts().OpenCL ? 2 : 3); +- +- // Add the block literal. ++ llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType(); ++ llvm::Value *Func = nullptr; ++ QualType FnType = BPT->getPointeeType(); ++ ASTContext &Ctx = getContext(); + CallArgList Args; + +- QualType VoidPtrQualTy = getContext().VoidPtrTy; +- llvm::Type *GenericVoidPtrTy = VoidPtrTy; + if (getLangOpts().OpenCL) { +- GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType(); +- VoidPtrQualTy = +- getContext().getPointerType(getContext().getAddrSpaceQualType( +- getContext().VoidTy, LangAS::opencl_generic)); +- } +- +- BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); +- Args.add(RValue::get(BlockPtr), VoidPtrQualTy); +- +- QualType FnType = BPT->getPointeeType(); ++ // For OpenCL, BlockPtr is already casted to generic block literal. ++ ++ // First argument of a block call is a generic block literal casted to ++ // generic void pointer, i.e. i8 addrspace(4)* ++ llvm::Value *BlockDescriptor = Builder.CreatePointerCast( ++ BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType()); ++ QualType VoidPtrQualTy = Ctx.getPointerType( ++ Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic)); ++ Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy); ++ // And the rest of the arguments. ++ EmitCallArgs(Args, FnType->getAs(), E->arguments()); ++ ++ // We *can* call the block directly unless it is a function argument. ++ if (!isa(E->getCalleeDecl())) ++ Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); ++ else { ++ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2); ++ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); ++ } ++ } else { ++ // Bitcast the block literal to a generic block literal. ++ BlockPtr = Builder.CreatePointerCast( ++ BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal"); ++ // Get pointer to the block invoke function ++ llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3); + +- // And the rest of the arguments. +- EmitCallArgs(Args, FnType->getAs(), E->arguments()); ++ // First argument is a block literal casted to a void pointer ++ BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy); ++ Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy); ++ // And the rest of the arguments. ++ EmitCallArgs(Args, FnType->getAs(), E->arguments()); + +- // Load the function. +- llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); ++ // Load the function. ++ Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); ++ } + + const FunctionType *FuncTy = FnType->castAs(); + const CGFunctionInfo &FnInfo = +diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp +index 7f6f595..75003e5 100644 +--- a/lib/CodeGen/CGOpenCLRuntime.cpp ++++ b/lib/CodeGen/CGOpenCLRuntime.cpp +@@ -123,6 +123,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); + } + ++// Get the block literal from an expression derived from the block expression. ++// OpenCL v2.0 s6.12.5: ++// Block variable declarations are implicitly qualified with const. Therefore ++// all block variables must be initialized at declaration time and may not be ++// reassigned. ++static const BlockExpr *getBlockExpr(const Expr *E) { ++ const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop. ++ while(!isa(E) && E != Prev) { ++ Prev = E; ++ E = E->IgnoreCasts(); ++ if (auto DR = dyn_cast(E)) { ++ E = cast(DR->getDecl())->getInit(); ++ } ++ } ++ return cast(E); ++} ++ + /// Record emitted llvm invoke function and llvm block literal for the + /// corresponding block expression. + void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, +@@ -137,20 +154,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, + EnqueuedBlockMap[E].Kernel = nullptr; + } + ++llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { ++ return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc; ++} ++ + CGOpenCLRuntime::EnqueuedBlockInfo + CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { + CGF.EmitScalarExpr(E); + + // The block literal may be assigned to a const variable. Chasing down + // to get the block literal. +- if (auto DR = dyn_cast(E)) { +- E = cast(DR->getDecl())->getInit(); +- } +- E = E->IgnoreImplicit(); +- if (auto Cast = dyn_cast(E)) { +- E = Cast->getSubExpr(); +- } +- auto *Block = cast(E); ++ const BlockExpr *Block = getBlockExpr(E); + + assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && + "Block expression not emitted"); +diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h +index 750721f..4effc7e 100644 +--- a/lib/CodeGen/CGOpenCLRuntime.h ++++ b/lib/CodeGen/CGOpenCLRuntime.h +@@ -92,6 +92,10 @@ public: + /// \param Block block literal emitted for the block expression. + void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, + llvm::Value *Block); ++ ++ /// \return LLVM block invoke function emitted for an expression derived from ++ /// the block expression. ++ llvm::Function *getInvokeFunction(const Expr *E); + }; + + } +diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl +index 19aacc3..ab5a2c6 100644 +--- a/test/CodeGenOpenCL/blocks.cl ++++ b/test/CodeGenOpenCL/blocks.cl +@@ -39,11 +39,8 @@ void foo(){ + // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* + // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]], + // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]] +- // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 + // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* +- // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] +- // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)* +- // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]]) ++ // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]]) + // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2 + // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]] + // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 +@@ -53,11 +50,8 @@ void foo(){ + // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic* + // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]], + // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]] +- // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2 + // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8* +- // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]] +- // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)* +- // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]]) ++ // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]]) + + int (^ block_B)(void) = ^{ + return i; +diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +index 8445016..1566912 100644 +--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl ++++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +@@ -312,9 +312,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { + }; + + // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. +- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) +- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* +- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) ++ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + block_A(); + + // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. +@@ -333,15 +331,35 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { + unsigned size = get_kernel_work_group_size(block_A); + + // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted. +- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) +- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* +- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) ++ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + block_A(); + ++ // Make sure that block invoke function is resolved correctly after sequence of assignements. ++ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* ++ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* ++ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) ++ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), ++ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1, ++ bl_t b1 = block_G; ++ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* ++ // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* ++ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) ++ // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), ++ // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2, ++ bl_t b2 = b1; ++ // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* ++ // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) ++ // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null) ++ b2(0); ++ // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]]. ++ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl( ++ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), ++ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) ++ size = get_kernel_preferred_work_group_size_multiple(b2); ++ + void (^block_C)(void) = ^{ + callee(i, a); + }; +- + // Emits block literal on stack and block kernel [[INVLK3]]. + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke + // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue +@@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { + // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}}) + // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}}) + // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}}) ++// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) + // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}}) + // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) +-// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) + // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}}) + // COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}}) +-- +1.8.3.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch new file mode 100644 index 00000000..510c7c6e --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0003-OpenCL-Fix-assertion-due-to-blocks.patch @@ -0,0 +1,61 @@ +From 29e2813a2ab7d5569860bb07892dfef7b5374d96 Mon Sep 17 00:00:00 2001 +From: Yaxun Liu +Date: Tue, 26 Feb 2019 16:20:41 +0000 +Subject: [PATCH] [OpenCL] Fix assertion due to blocks + +A recent change caused assertion in CodeGenFunction::EmitBlockCallExpr when a block is called. + +There is code + + Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); +getCalleeDecl calls Expr::getReferencedDeclOfCallee, which does not handle +BlockExpr and returns nullptr, which causes isa to assert. + +This patch fixes that. + +Differential Revision: https://reviews.llvm.org/D58658 + + +git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354893 91177308-0d34-0410-b5e6-96231b3b80d8 + +Upstream-Status: Backport +[https://github.com/llvm-mirror/clang/commit/29e2813a2ab7d5569860bb07892dfef7b5374d96] +Signed-off-by: Anuj Mittal +--- + lib/AST/Expr.cpp | 2 ++ + test/CodeGenOpenCL/blocks.cl | 6 ++++++ + 2 files changed, 8 insertions(+) + +diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp +index aef1eab..85690c7 100644 +--- a/lib/AST/Expr.cpp ++++ b/lib/AST/Expr.cpp +@@ -1358,6 +1358,8 @@ Decl *Expr::getReferencedDeclOfCallee() { + return DRE->getDecl(); + if (MemberExpr *ME = dyn_cast(CEE)) + return ME->getMemberDecl(); ++ if (auto *BE = dyn_cast(CEE)) ++ return BE->getBlockDecl(); + + return nullptr; + } +diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl +index ab5a2c6..c3e2685 100644 +--- a/test/CodeGenOpenCL/blocks.cl ++++ b/test/CodeGenOpenCL/blocks.cl +@@ -90,6 +90,12 @@ int get42() { + return blockArgFunc(^{return 42;}); + } + ++// COMMON-LABEL: define {{.*}}@call_block ++// call {{.*}}@__call_block_block_invoke ++int call_block() { ++ return ^int(int num) { return num; } (11); ++} ++ + // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size" + // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align" + +-- +1.8.3.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend new file mode 100644 index 00000000..f536f0f2 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend @@ -0,0 +1,16 @@ +FILESEXTRAPATHS_prepend_intel-x86-common := "${THISDIR}/files:" + +SRC_URI_append_intel-x86-common = " \ + git://github.com/intel/opencl-clang.git;protocol=https;branch=ocl-open-80;destsuffix=git/llvm/projects/opencl-clang;name=opencl-clang \ + git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_80;destsuffix=git/llvm/projects/llvm-spirv;name=spirv \ + file://0001-point-to-correct-clang.patch;patchdir=llvm/projects/opencl-clang \ + file://0001-OpenCL-Change-type-of-block-pointer-for-OpenCL.patch;patchdir=clang \ + file://0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch;patchdir=clang \ + file://0003-OpenCL-Fix-assertion-due-to-blocks.patch;patchdir=clang \ + file://0001-dont-export-targets-for-binaries.patch \ + file://0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch;patchdir=llvm/projects/llvm-spirv \ + " + +SRCREV_opencl-clang = "daf5e4dd718477ae8cf89a283c653939d9182f15" +SRCREV_spirv = "bd0f28fb92061d49c0f120b4dac3fd8956006745" + -- cgit v1.2.3-54-g00ecf