summaryrefslogtreecommitdiffstats
path: root/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
diff options
context:
space:
mode:
Diffstat (limited to 'dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch')
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch1119
1 files changed, 1119 insertions, 0 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
new file mode 100644
index 00000000..4a528674
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch
@@ -0,0 +1,1119 @@
1From 9ce0fe02fd6cda5fb29fbb0d5037a1798a810b8a Mon Sep 17 00:00:00 2001
2From: Alexey Sotkin <alexey.sotkin@intel.com>
3Date: Thu, 21 Feb 2019 17:14:36 +0300
4Subject: [PATCH 1/3] Update LowerOpenCL pass to handle new blocks
5 represntation in LLVM IR
6
7Upstream-Status: Backport
8[https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/bd6ddfaf7232cd81c7f2fe9877e66f286731bd8e]
9Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
10---
11 lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 413 ++++++++----------------------
12 test/global_block.ll | 71 ++---
13 test/literal-struct.ll | 31 ++-
14 test/transcoding/block_w_struct_return.ll | 47 ++--
15 test/transcoding/enqueue_kernel.ll | 237 ++++++++++-------
16 5 files changed, 317 insertions(+), 482 deletions(-)
17
18diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
19index 50e1838..b42a4ec 100644
20--- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
21+++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
22@@ -1,303 +1,110 @@
23-//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===//
24-//
25-// The LLVM/SPIRV Translator
26-//
27-// This file is distributed under the University of Illinois Open Source
28-// License. See LICENSE.TXT for details.
29-//
30-// Copyright (c) 2018 Intel Corporation. All rights reserved.
31-//
32-// Permission is hereby granted, free of charge, to any person obtaining a
33-// copy of this software and associated documentation files (the "Software"),
34-// to deal with the Software without restriction, including without limitation
35-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
36-// and/or sell copies of the Software, and to permit persons to whom the
37-// Software is furnished to do so, subject to the following conditions:
38-//
39-// Redistributions of source code must retain the above copyright notice,
40-// this list of conditions and the following disclaimers.
41-// Redistributions in binary form must reproduce the above copyright notice,
42-// this list of conditions and the following disclaimers in the documentation
43-// and/or other materials provided with the distribution.
44-// Neither the names of Intel Corporation, nor the names of its
45-// contributors may be used to endorse or promote products derived from this
46-// Software without specific prior written permission.
47-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
48-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
49-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
50-// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
52-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
53-// THE SOFTWARE.
54-//
55-//===----------------------------------------------------------------------===//
56-//
57-// SPIR-V specification doesn't allow function pointers, so SPIR-V translator
58-// is designed to fail if a value with function type (except calls) is occured.
59-// Currently there is only two cases, when function pointers are generating in
60-// LLVM IR in OpenCL - block calls and device side enqueue built-in calls.
61-//
62-// In both cases values with function type used as intermediate representation
63-// for block literal structure.
64-//
65-// This pass is designed to find such cases and simplify them to avoid any
66-// function pointer types occurrences in LLVM IR in 4 steps.
67-//
68-// 1. Find all function pointer allocas, like
69-// %block = alloca void () *
70-//
71-// Then find a single store to that alloca:
72-// %blockLit = alloca <{ i32, i32, ...}>, align 4
73-// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()*
74-// > store void ()* %0, void ()** %block, align 4
75-//
76-// And replace the alloca users by new instructions which used stored value
77-// %blockLit itself instead of function pointer alloca %block.
78-//
79-// 2. Find consecutive casts from block literal type to i8 addrspace(4)*
80-// used function pointers as an intermediate type:
81-// %0 = bitcast <{ i32, i32 }> %block to void() *
82-// %1 = addrspacecast void() * %0 to i8 addrspace(4)*
83-// And simplify them:
84-// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)*
85-//
86-// 3. Find all unused instructions with function pointer type occured after
87-// pp.1-2 and remove them.
88-//
89-// 4. Find unused globals with function pointer type, like
90-// @block = constant void ()*
91-// bitcast ({ i32, i32 }* @__block_literal_global to void ()*
92-//
93-// And remove them.
94-//
95-//===----------------------------------------------------------------------===//
96-#define DEBUG_TYPE "spv-lower-ocl-blocks"
97-
98-#include "OCLUtil.h"
99-#include "SPIRVInternal.h"
100-
101-#include "llvm/ADT/SetVector.h"
102-#include "llvm/Analysis/ValueTracking.h"
103-#include "llvm/IR/GlobalVariable.h"
104-#include "llvm/IR/InstIterator.h"
105-#include "llvm/IR/Module.h"
106-#include "llvm/Pass.h"
107-#include "llvm/PassSupport.h"
108-#include "llvm/Support/Casting.h"
109-
110-using namespace llvm;
111-
112-namespace {
113-
114-static void
115-removeUnusedFunctionPtrInst(Instruction *I,
116- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
117- for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) {
118- Instruction *OpI = dyn_cast<Instruction>(I->getOperand(OpIdx));
119- I->setOperand(OpIdx, nullptr);
120- if (OpI && OpI != I && OpI->user_empty())
121- FuncPtrInsts.insert(OpI);
122- }
123- I->eraseFromParent();
124-}
125-
126-static bool isFuncPtrAlloca(const AllocaInst *AI) {
127- auto *ET = dyn_cast<PointerType>(AI->getAllocatedType());
128- return ET && ET->getElementType()->isFunctionTy();
129-}
130-
131-static bool hasFuncPtrType(const Value *V) {
132- auto *PT = dyn_cast<PointerType>(V->getType());
133- return PT && PT->getElementType()->isFunctionTy();
134-}
135-
136-static bool isFuncPtrInst(const Instruction *I) {
137- if (auto *AI = dyn_cast<AllocaInst>(I))
138- return isFuncPtrAlloca(AI);
139-
140- for (auto &Op : I->operands()) {
141- if (auto *AI = dyn_cast<AllocaInst>(Op))
142- return isFuncPtrAlloca(AI);
143-
144- auto *OpI = dyn_cast<Instruction>(&Op);
145- if (OpI && OpI != I && hasFuncPtrType(OpI))
146- return true;
147- }
148- return false;
149-}
150-
151-static StoreInst *findSingleStore(AllocaInst *AI) {
152- StoreInst *Store = nullptr;
153- for (auto *U : AI->users()) {
154- if (!isa<StoreInst>(U))
155- continue; // not a store
156- if (Store)
157- return nullptr; // there are more than one stores
158- Store = dyn_cast<StoreInst>(U);
159- }
160- return Store;
161-}
162-
163-static void fixFunctionPtrAllocaUsers(AllocaInst *AI) {
164- // Find and remove a single store to alloca
165- auto *SingleStore = findSingleStore(AI);
166- assert(SingleStore && "More than one store to the function pointer alloca");
167- auto *StoredVal = SingleStore->getValueOperand();
168- SingleStore->eraseFromParent();
169-
170- // Find loads from the alloca and replace thier users
171- for (auto *U : AI->users()) {
172- auto *LI = dyn_cast<LoadInst>(U);
173- if (!LI)
174- continue;
175-
176- for (auto *U : LI->users()) {
177- auto *UInst = cast<Instruction>(U);
178- auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast(
179- StoredVal, UInst->getType(), "", UInst);
180- UInst->replaceAllUsesWith(Cast);
181- }
182- }
183-}
184-
185-static int getBlockLiteralIdx(const Function &F) {
186- StringRef FName = F.getName();
187- if (isEnqueueKernelBI(FName))
188- return FName.contains("events") ? 7 : 4;
189- if (isKernelQueryBI(FName))
190- return FName.contains("for_ndrange") ? 2 : 1;
191- if (FName.startswith("__") && FName.contains("_block_invoke"))
192- return F.hasStructRetAttr() ? 1 : 0;
193-
194- return -1; // No block literal argument
195-}
196-
197-static bool hasBlockLiteralArg(const Function &F) {
198- return getBlockLiteralIdx(F) != -1;
199-}
200-
201-static bool simplifyFunctionPtrCasts(Function &F) {
202- bool Changed = false;
203- int BlockLiteralIdx = getBlockLiteralIdx(F);
204- for (auto *U : F.users()) {
205- auto *Call = dyn_cast<CallInst>(U);
206- if (!Call)
207- continue;
208- if (Call->getFunction()->getName() == F.getName().str() + "_kernel")
209- continue; // Skip block invoke function calls inside block invoke kernels
210-
211- const DataLayout &DL = F.getParent()->getDataLayout();
212- auto *BlockLiteral = Call->getOperand(BlockLiteralIdx);
213- auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL);
214- if (isa<GlobalVariable>(BlockLiteralVal))
215- continue; // nothing to do with globals
216-
217- auto *BlockLiteralAlloca = cast<AllocaInst>(BlockLiteralVal);
218- assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() &&
219- "Function type shouldn't be there");
220-
221- auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast(
222- BlockLiteralAlloca, BlockLiteral->getType(), "", Call);
223- BlockLiteral->replaceAllUsesWith(NewBlockLiteral);
224- Changed |= true;
225- }
226- return Changed;
227-}
228-
229-static void
230-findFunctionPtrAllocas(Module &M,
231- SmallVectorImpl<AllocaInst *> &FuncPtrAllocas) {
232- for (auto &F : M) {
233- if (F.isDeclaration())
234- continue;
235- for (auto &I : instructions(F)) {
236- auto *AI = dyn_cast<AllocaInst>(&I);
237- if (!AI || !isFuncPtrAlloca(AI))
238- continue;
239- FuncPtrAllocas.push_back(AI);
240- }
241- }
242-}
243-
244-static void
245-findUnusedFunctionPtrInsts(Module &M,
246- SmallSetVector<Instruction *, 16> &FuncPtrInsts) {
247- for (auto &F : M) {
248- if (F.isDeclaration())
249- continue;
250- for (auto &I : instructions(F))
251- if (I.user_empty() && isFuncPtrInst(&I))
252- FuncPtrInsts.insert(&I);
253- }
254-}
255-
256-static void
257-findUnusedFunctionPtrGlbs(Module &M,
258- SmallVectorImpl<GlobalVariable *> &FuncPtrGlbs) {
259- for (auto &GV : M.globals()) {
260- if (!GV.user_empty())
261- continue;
262- auto *GVType = dyn_cast<PointerType>(GV.getType()->getElementType());
263- if (GVType && GVType->getElementType()->isFunctionTy())
264- FuncPtrGlbs.push_back(&GV);
265- }
266-}
267-
268-class SPIRVLowerOCLBlocks : public ModulePass {
269-
270-public:
271- SPIRVLowerOCLBlocks() : ModulePass(ID) {}
272-
273- bool runOnModule(Module &M) {
274- bool Changed = false;
275-
276- // 1. Find function pointer allocas and fix their users
277- SmallVector<AllocaInst *, 16> FuncPtrAllocas;
278- findFunctionPtrAllocas(M, FuncPtrAllocas);
279-
280- Changed |= !FuncPtrAllocas.empty();
281- for (auto *AI : FuncPtrAllocas)
282- fixFunctionPtrAllocaUsers(AI);
283-
284- // 2. Simplify consecutive casts which use function pointer types
285- for (auto &F : M)
286- if (hasBlockLiteralArg(F))
287- Changed |= simplifyFunctionPtrCasts(F);
288-
289- // 3. Cleanup unused instructions with function pointer type
290- // which are occured after pp. 1-2
291- SmallSetVector<Instruction *, 16> FuncPtrInsts;
292- findUnusedFunctionPtrInsts(M, FuncPtrInsts);
293-
294- Changed |= !FuncPtrInsts.empty();
295- while (!FuncPtrInsts.empty()) {
296- Instruction *I = FuncPtrInsts.pop_back_val();
297- removeUnusedFunctionPtrInst(I, FuncPtrInsts);
298- }
299-
300- // 4. Find and remove unused global variables with function pointer type
301- SmallVector<GlobalVariable *, 16> FuncPtrGlbs;
302- findUnusedFunctionPtrGlbs(M, FuncPtrGlbs);
303-
304- Changed |= !FuncPtrGlbs.empty();
305- for (auto *GV : FuncPtrGlbs)
306- GV->eraseFromParent();
307-
308- return Changed;
309- }
310-
311- static char ID;
312-}; // class SPIRVLowerOCLBlocks
313-
314-char SPIRVLowerOCLBlocks::ID = 0;
315-
316-} // namespace
317-
318-INITIALIZE_PASS(
319- SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks",
320- "Remove function pointers occured in case of using OpenCL blocks", false,
321- false)
322-
323-llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() {
324- return new SPIRVLowerOCLBlocks();
325-}
326+//===- SPIRVLowerOCLBlocks.cpp - OCL Utilities ----------------------------===//
327+//
328+// The LLVM/SPIRV Translator
329+//
330+// This file is distributed under the University of Illinois Open Source
331+// License. See LICENSE.TXT for details.
332+//
333+// Copyright (c) 2018 Intel Corporation. All rights reserved.
334+//
335+// Permission is hereby granted, free of charge, to any person obtaining a
336+// copy of this software and associated documentation files (the "Software"),
337+// to deal with the Software without restriction, including without limitation
338+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
339+// and/or sell copies of the Software, and to permit persons to whom the
340+// Software is furnished to do so, subject to the following conditions:
341+//
342+// Redistributions of source code must retain the above copyright notice,
343+// this list of conditions and the following disclaimers.
344+// Redistributions in binary form must reproduce the above copyright notice,
345+// this list of conditions and the following disclaimers in the documentation
346+// and/or other materials provided with the distribution.
347+// Neither the names of Intel Corporation, nor the names of its
348+// contributors may be used to endorse or promote products derived from this
349+// Software without specific prior written permission.
350+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
351+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
352+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
353+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
354+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
355+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
356+// THE SOFTWARE.
357+//
358+//===----------------------------------------------------------------------===//
359+//
360+// SPIR-V specification doesn't allow function pointers, so SPIR-V translator
361+// is designed to fail if a value with function type (except calls) is occured.
362+// Currently there is only two cases, when function pointers are generating in
363+// LLVM IR in OpenCL - block calls and device side enqueue built-in calls.
364+//
365+// In both cases values with function type used as intermediate representation
366+// for block literal structure.
367+//
368+// In LLVM IR produced by clang, blocks are represented with the following
369+// structure:
370+// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
371+// Pointers to block invoke functions are stored in the third field. Clang
372+// replaces inderect function calls in all cases except if block is passed as a
373+// function argument. Note that it is somewhat unclear if the OpenCL C spec
374+// should allow passing blocks as function argumernts. This pass is not supposed
375+// to work correctly with such functions.
376+// Clang though has to store function pointers to this structure. Purpose of
377+// this pass is to replace store of function pointers(not allowed in SPIR-V)
378+// with null pointers.
379+//
380+//===----------------------------------------------------------------------===//
381+#define DEBUG_TYPE "spv-lower-ocl-blocks"
382+
383+#include "SPIRVInternal.h"
384+
385+#include "llvm/IR/Module.h"
386+#include "llvm/Pass.h"
387+#include "llvm/Support/Regex.h"
388+
389+using namespace llvm;
390+
391+namespace {
392+
393+static bool isBlockInvoke(Function &F) {
394+ static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$");
395+ return BlockInvokeRegex.match(F.getName());
396+}
397+
398+class SPIRVLowerOCLBlocks : public ModulePass {
399+
400+public:
401+ SPIRVLowerOCLBlocks() : ModulePass(ID) {}
402+
403+ bool runOnModule(Module &M) {
404+ bool Changed = false;
405+ for (Function &F : M) {
406+ if (!isBlockInvoke(F))
407+ continue;
408+ for (User *U : F.users()) {
409+ if (!isa<Constant>(U))
410+ continue;
411+ Constant *Null = Constant::getNullValue(U->getType());
412+ if (U != Null) {
413+ U->replaceAllUsesWith(Null);
414+ Changed = true;
415+ }
416+ }
417+ }
418+ return Changed;
419+ }
420+
421+ static char ID;
422+};
423+
424+char SPIRVLowerOCLBlocks::ID = 0;
425+
426+} // namespace
427+
428+INITIALIZE_PASS(
429+ SPIRVLowerOCLBlocks, "spv-lower-ocl-blocks",
430+ "Remove function pointers occured in case of using OpenCL blocks", false,
431+ false)
432+
433+llvm::ModulePass *llvm::createSPIRVLowerOCLBlocks() {
434+ return new SPIRVLowerOCLBlocks();
435+}
436diff --git a/test/global_block.ll b/test/global_block.ll
437index a9267d8..efb4cf3 100644
438--- a/test/global_block.ll
439+++ b/test/global_block.ll
440@@ -16,7 +16,7 @@
441 ; RUN: llvm-spirv %t.bc -o %t.spv
442 ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM
443
444-target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
445+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
446 target triple = "spir-unknown-unknown"
447
448 ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke"
449@@ -26,71 +26,56 @@ target triple = "spir-unknown-unknown"
450 ; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]]
451 ; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]]
452
453-;; This variable is not needed in SPIRV
454-; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1
455-; CHECK-LLVM-NOT: @block_kernel.b1
456-@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8
457+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
458
459-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
460+@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4
461+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
462
463-; Function Attrs: convergent nounwind
464-define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
465+; Function Attrs: convergent noinline nounwind optnone
466+define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
467 entry:
468- %res.addr = alloca i32 addrspace(1)*, align 8
469- store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
470-
471+ %res.addr = alloca i32 addrspace(1)*, align 4
472+ store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4
473 ; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]]
474 ; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5)
475- %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
476-
477- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10
478- store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14
479+ %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2
480+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4
481+ store i32 %call, i32 addrspace(1)* %0, align 4
482 ret void
483 }
484
485-; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]]
486+; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]]
487 ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}}
488 ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}}
489 ; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}})
490-; Function Attrs: convergent nounwind
491+; Function Attrs: convergent noinline nounwind optnone
492 define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 {
493 entry:
494- %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
495+ %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
496 %i.addr = alloca i32, align 4
497- store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
498- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
499- store i32 %i, i32* %i.addr, align 4, !tbaa !14
500- %0 = load i32, i32* %i.addr, align 4, !tbaa !14
501+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
502+ store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
503+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
504+ store i32 %i, i32* %i.addr, align 4
505+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
506+ %0 = load i32, i32* %i.addr, align 4
507 %add = add nsw i32 %0, 1
508 ret i32 %add
509 }
510
511-attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
512-attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
513+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
514+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
515 attributes #2 = { convergent }
516
517 !llvm.module.flags = !{!0}
518-!opencl.enable.FP_CONTRACT = !{}
519 !opencl.ocl.version = !{!1}
520 !opencl.spir.version = !{!1}
521-!opencl.used.extensions = !{!2}
522-!opencl.used.optional.core.features = !{!2}
523-!opencl.compiler.options = !{!2}
524-!llvm.ident = !{!3}
525+!llvm.ident = !{!2}
526
527 !0 = !{i32 1, !"wchar_size", i32 4}
528 !1 = !{i32 2, i32 0}
529-!2 = !{}
530-!3 = !{!"clang version 7.0.0"}
531-!4 = !{i32 1}
532-!5 = !{!"none"}
533-!6 = !{!"int*"}
534-!7 = !{!""}
535-!8 = !{i1 false}
536-!9 = !{i32 0}
537-!10 = !{!11, !11, i64 0}
538-!11 = !{!"any pointer", !12, i64 0}
539-!12 = !{!"omnipotent char", !13, i64 0}
540-!13 = !{!"Simple C/C++ TBAA"}
541-!14 = !{!15, !15, i64 0}
542-!15 = !{!"int", !12, i64 0}
543+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
544+!3 = !{i32 1}
545+!4 = !{!"none"}
546+!5 = !{!"int*"}
547+!6 = !{!""}
548diff --git a/test/literal-struct.ll b/test/literal-struct.ll
549index c52170a..52a731a 100644
550--- a/test/literal-struct.ll
551+++ b/test/literal-struct.ll
552@@ -2,7 +2,7 @@
553 ; structs, i.e. structs whose type has no name. Typicaly clang generate such
554 ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with
555 ; the following command:
556-; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll
557+; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll
558
559 ; literal-struct.cl:
560 ; void foo()
561@@ -14,25 +14,28 @@
562 ; RUN: llvm-as < %s | llvm-spirv -spirv-text -o %t
563 ; RUN: FileCheck < %t %s
564
565-; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0
566-; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}}
567+; CHECK: TypeInt [[Int:[0-9]+]] 32 0
568+; CHECK: TypeInt [[Int8:[0-9]+]] 8 0
569+; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]]
570+; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]]
571
572 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
573 target triple = "spir"
574
575-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
576+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
577+
578+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4
579 ; CHECK: ConstantComposite [[StructType]]
580
581-; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct.
582-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4
583+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4
584 ; CHECK: ConstantNull [[StructType]]
585
586 ; Function Attrs: convergent noinline nounwind optnone
587 define spir_func void @foo() #0 {
588 entry:
589- %myBlock = alloca void () addrspace(4)*, align 4
590- store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4
591- call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
592+ %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4
593+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4
594+ call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1
595 ret void
596 }
597
598@@ -40,14 +43,14 @@ entry:
599 define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 {
600 entry:
601 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
602- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
603+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
604 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
605- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
606- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
607+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
608+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
609 ret void
610 }
611
612-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
613+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
614 attributes #1 = { convergent }
615
616 !llvm.module.flags = !{!0}
617@@ -57,4 +60,4 @@ attributes #1 = { convergent }
618
619 !0 = !{i32 1, !"wchar_size", i32 4}
620 !1 = !{i32 2, i32 0}
621-!2 = !{!"clang version 8.0.0 "}
622+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
623diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll
624index 76e29f0..df89b13 100644
625--- a/test/transcoding/block_w_struct_return.ll
626+++ b/test/transcoding/block_w_struct_return.ll
627@@ -16,6 +16,8 @@
628 ; res[tid] = kernelBlock(aa).a - 6;
629 ; }
630
631+; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll
632+
633 ; RUN: llvm-as %s -o %t.bc
634 ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt
635 ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV
636@@ -27,12 +29,14 @@
637 ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke"
638
639 ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32
640+; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8
641+; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]]
642 ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]]
643 ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]]
644
645 ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7
646 ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7
647-; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}}
648+; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}}
649 ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]]
650
651 ; CHECK-LLVM: %[[StructA:.*]] = type { i32 }
652@@ -41,20 +45,21 @@
653 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
654 target triple = "spir64-unknown-unknown"
655
656+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
657 %struct.A = type { i32 }
658
659-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
660+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8
661
662 ; Function Attrs: convergent noinline nounwind optnone
663-define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 {
664+define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
665 entry:
666 %res.addr = alloca i32 addrspace(1)*, align 8
667- %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8
668+ %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8
669 %tid = alloca i64, align 8
670 %aa = alloca %struct.A, align 4
671 %tmp = alloca %struct.A, align 4
672 store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8
673- store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8
674+ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8
675 %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4
676 store i64 %call, i64* %tid, align 8
677 %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8
678@@ -63,7 +68,7 @@ entry:
679 store i32 -1, i32 addrspace(1)* %arrayidx, align 4
680 %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0
681 store i32 5, i32* %a, align 4
682- call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
683+ call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5
684 %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0
685 %2 = load i32, i32* %a1, align 4
686 %sub = sub nsw i32 %2, 6
687@@ -78,10 +83,10 @@ entry:
688 define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 {
689 entry:
690 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8
691- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8
692+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8
693 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8
694- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
695- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8
696+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
697+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8
698 %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0
699 store i32 6, i32* %a1, align 4
700 %0 = bitcast %struct.A* %agg.result to i8*
701@@ -96,30 +101,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r
702 ; Function Attrs: convergent nounwind readnone
703 declare spir_func i64 @_Z13get_global_idj(i32) #3
704
705-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
706-attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
707+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
708+attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
709 attributes #2 = { argmemonly nounwind }
710 attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
711 attributes #4 = { convergent nounwind readnone }
712 attributes #5 = { convergent }
713
714 !llvm.module.flags = !{!0}
715-!opencl.enable.FP_CONTRACT = !{}
716 !opencl.ocl.version = !{!1}
717 !opencl.spir.version = !{!1}
718-!opencl.used.extensions = !{!2}
719-!opencl.used.optional.core.features = !{!2}
720-!opencl.compiler.options = !{!2}
721-!llvm.ident = !{!3}
722+!llvm.ident = !{!2}
723
724 !0 = !{i32 1, !"wchar_size", i32 4}
725 !1 = !{i32 2, i32 0}
726-!2 = !{}
727-!3 = !{!"clang version 7.0.0"}
728-!4 = !{i32 1}
729-!5 = !{!"none"}
730-!6 = !{!"int*"}
731-!7 = !{!""}
732-!8 = !{i1 false}
733-!9 = !{i32 0}
734-
735+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
736+!3 = !{i32 1}
737+!4 = !{!"none"}
738+!5 = !{!"int*"}
739+!6 = !{!""}
740diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll
741index 0d29c71..435871d 100644
742--- a/test/transcoding/enqueue_kernel.ll
743+++ b/test/transcoding/enqueue_kernel.ll
744@@ -51,11 +51,12 @@
745 ; ModuleID = 'enqueue_kernel.cl'
746 source_filename = "enqueue_kernel.cl"
747 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
748-target triple = "spir-unknown-unknown"
749+target triple = "spir"
750
751 %opencl.queue_t = type opaque
752 %struct.ndrange_t = type { i32 }
753 %opencl.clk_event_t = type opaque
754+%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
755
756 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel"
757 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel"
758@@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown"
759
760 ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32
761 ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8
762-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
763 ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0
764-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17
765+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21
766 ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2
767-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20
768-; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
769+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8
770+; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24
771
772 ; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}}
773+; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
774+; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]]
775 ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]]
776 ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]]
777-; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]]
778 ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]]
779 ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
780 ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
781 ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]]
782 ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]]
783
784-; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32 }
785-; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }>
786-; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
787-; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32 }>
788+; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32, i8 addrspace(4)* }
789+; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>
790+; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
791+; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32, i8 addrspace(4)* }>
792
793-; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
794-; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
795+; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
796+; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4
797
798-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
799-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4
800+@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4
801+@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4
802
803 ; Function Attrs: convergent noinline nounwind optnone
804-define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
805+define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
806 entry:
807+ %a.addr = alloca i32 addrspace(1)*, align 4
808+ %b.addr = alloca i32 addrspace(1)*, align 4
809+ %i.addr = alloca i32, align 4
810+ %c0.addr = alloca i8, align 1
811 %default_queue = alloca %opencl.queue_t*, align 4
812 %flags = alloca i32, align 4
813 %ndrange = alloca %struct.ndrange_t, align 4
814 %clk_event = alloca %opencl.clk_event_t*, align 4
815 %event_wait_list = alloca %opencl.clk_event_t*, align 4
816 %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4
817- %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4
818- %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
819+ %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4
820+ %tmp = alloca %struct.ndrange_t, align 4
821+ %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4
822+ %tmp4 = alloca %struct.ndrange_t, align 4
823 %c = alloca i8, align 1
824+ %tmp11 = alloca %struct.ndrange_t, align 4
825+ %block_sizes = alloca [1 x i32], align 4
826+ %tmp12 = alloca %struct.ndrange_t, align 4
827+ %block_sizes13 = alloca [3 x i32], align 4
828+ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4
829+ store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4
830+ store i32 %i, i32* %i.addr, align 4
831+ store i8 %c0, i8* %c0.addr, align 1
832 store i32 0, i32* %flags, align 4
833 %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
834 %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4
835 store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4
836 %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
837 %2 = load i32, i32* %flags, align 4
838- %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
839- store i32 17, i32* %block.size, align 4
840- %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
841+ %3 = bitcast %struct.ndrange_t* %tmp to i8*
842+ %4 = bitcast %struct.ndrange_t* %ndrange to i8*
843+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false)
844+ %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0
845+ store i32 21, i32* %block.size, align 4
846+ %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1
847 store i32 4, i32* %block.align, align 4
848- %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
849- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4
850- %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
851- store i32 %i, i32* %block.captured1, align 4
852- %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
853- store i8 %c0, i8* %block.captured2, align 4
854- %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()*
855- %4 = addrspacecast void ()* %3 to i8 addrspace(4)*
856+ %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2
857+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4
858+ %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3
859+ %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
860+ store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4
861+ %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4
862+ %6 = load i32, i32* %i.addr, align 4
863+ store i32 %6, i32* %block.captured1, align 4
864+ %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5
865+ %7 = load i8, i8* %c0.addr, align 1
866+ store i8 %7, i8* %block.captured2, align 4
867+ %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic*
868+ %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)*
869
870 ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]]
871 ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}}
872 ; [[ConstInt0]] [[EventNull]] [[EventNull]]
873 ; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]]
874
875-; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)*
876+; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic*
877+; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)*
878 ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)*
879-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]])
880-
881- %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4)
882- %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
883- %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
884- %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
885- store i32 20, i32* %block.size5, align 4
886- %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
887+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]])
888+
889+ %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
890+ %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
891+ %12 = load i32, i32* %flags, align 4
892+ %13 = bitcast %struct.ndrange_t* %tmp4 to i8*
893+ %14 = bitcast %struct.ndrange_t* %ndrange to i8*
894+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false)
895+ %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
896+ %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
897+ %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0
898+ store i32 24, i32* %block.size5, align 4
899+ %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1
900 store i32 4, i32* %block.align6, align 4
901- %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
902- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4
903- %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
904- store i32 %i, i32* %block.captured8, align 4
905- %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
906- store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4
907- %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()*
908- %9 = addrspacecast void ()* %8 to i8 addrspace(4)*
909+ %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2
910+ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4
911+ %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3
912+ %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4
913+ store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4
914+ %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4
915+ %18 = load i32, i32* %i.addr, align 4
916+ store i32 %18, i32* %block.captured9, align 4
917+ %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5
918+ %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4
919+ store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4
920+ %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic*
921+ %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)*
922+
923
924 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]]
925 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]]
926@@ -158,16 +193,24 @@ entry:
927 ; [[ConstInt2]] [[Event1]] [[Event2]]
928 ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]]
929
930-; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)*
931+; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic*
932+; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4)
933 ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)*
934-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]])
935-
936- %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
937- %11 = alloca [1 x i32]
938- %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0
939- %13 = load i8, i8* %c, align 1
940- %14 = zext i8 %13 to i32
941- store i32 %14, i32* %12, align 4
942+; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]])
943+
944+ %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21)
945+ %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
946+ %24 = load i32, i32* %flags, align 4
947+ %25 = bitcast %struct.ndrange_t* %tmp11 to i8*
948+ %26 = bitcast %struct.ndrange_t* %ndrange to i8*
949+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false)
950+ %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0
951+ %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)*
952+ %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)*
953+ %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0
954+ %30 = load i8, i8* %c, align 1
955+ %31 = zext i8 %30 to i32
956+ store i32 %31, i32* %29, align 4
957
958 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]]
959 ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]]
960@@ -182,14 +225,18 @@ entry:
961 ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)*
962 ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}})
963
964- %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12)
965- %16 = alloca [3 x i32]
966- %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0
967- store i32 1, i32* %17, align 4
968- %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1
969- store i32 2, i32* %18, align 4
970- %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2
971- store i32 4, i32* %19, align 4
972+ %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29)
973+ %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4
974+ %34 = load i32, i32* %flags, align 4
975+ %35 = bitcast %struct.ndrange_t* %tmp12 to i8*
976+ %36 = bitcast %struct.ndrange_t* %ndrange to i8*
977+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false)
978+ %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0
979+ store i32 1, i32* %37, align 4
980+ %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1
981+ store i32 2, i32* %38, align 4
982+ %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2
983+ store i32 4, i32* %39, align 4
984
985 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]]
986 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]]
987@@ -206,24 +253,27 @@ entry:
988 ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)*
989 ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}})
990
991- %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17)
992+ %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37)
993 ret void
994 }
995
996+; Function Attrs: argmemonly nounwind
997+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1
998+
999 ; Function Attrs: convergent noinline nounwind optnone
1000 define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 {
1001 entry:
1002 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1003- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
1004+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4
1005 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1006- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
1007- store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
1008- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
1009+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*
1010+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4
1011+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5
1012 %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4
1013 %conv = sext i8 %0 to i32
1014- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2
1015+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
1016 %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4
1017- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3
1018+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4
1019 %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4
1020 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2
1021 store i32 %conv, i32 addrspace(1)* %arrayidx, align 4
1022@@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i
1023 define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 {
1024 entry:
1025 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1026- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
1027+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4
1028 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1029- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
1030- store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
1031- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1032+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*
1033+ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4
1034+ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5
1035 %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4
1036- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1037+ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1038 %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4
1039 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1
1040 %2 = load i32, i32 addrspace(1)* %arrayidx, align 4
1041- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2
1042+ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1043 %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4
1044- %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3
1045+ %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4
1046 %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4
1047 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4
1048 store i32 %2, i32 addrspace(1)* %arrayidx4, align 4
1049@@ -276,11 +326,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac
1050 entry:
1051 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
1052 %p.addr = alloca i8 addrspace(3)*, align 4
1053- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
1054+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
1055 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1056- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
1057+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
1058 store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4
1059- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
1060+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
1061 ret void
1062 }
1063
1064@@ -300,13 +350,13 @@ entry:
1065 %p1.addr = alloca i8 addrspace(3)*, align 4
1066 %p2.addr = alloca i8 addrspace(3)*, align 4
1067 %p3.addr = alloca i8 addrspace(3)*, align 4
1068- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4
1069+ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4
1070 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4
1071- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)*
1072+ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*
1073 store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4
1074 store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4
1075 store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4
1076- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4
1077+ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4
1078 ret void
1079 }
1080
1081@@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*,
1082 ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*)
1083 ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)
1084
1085-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
1086+attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
1087 attributes #1 = { argmemonly nounwind }
1088-attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
1089+attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
1090 attributes #3 = { nounwind }
1091
1092 !llvm.module.flags = !{!0}
1093-!opencl.enable.FP_CONTRACT = !{}
1094 !opencl.ocl.version = !{!1}
1095 !opencl.spir.version = !{!1}
1096-!opencl.used.extensions = !{!2}
1097-!opencl.used.optional.core.features = !{!2}
1098-!opencl.compiler.options = !{!2}
1099-!llvm.ident = !{!3}
1100+!llvm.ident = !{!2}
1101
1102 !0 = !{i32 1, !"wchar_size", i32 4}
1103 !1 = !{i32 2, i32 0}
1104-!2 = !{}
1105-!3 = !{!"clang version 7.0.0"}
1106-!4 = !{i32 1, i32 1, i32 0, i32 0}
1107-!5 = !{!"none", !"none", !"none", !"none"}
1108-!6 = !{!"int*", !"int*", !"int", !"char"}
1109-!7 = !{!"", !"", !"", !""}
1110-!8 = !{i1 false, i1 false, i1 false, i1 false}
1111-!9 = !{i32 0, i32 0, i32 0, i32 0}
1112+!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"}
1113+!3 = !{i32 1, i32 1, i32 0, i32 0}
1114+!4 = !{!"none", !"none", !"none", !"none"}
1115+!5 = !{!"int*", !"int*", !"int", !"char"}
1116+!6 = !{!"", !"", !"", !""}
1117--
11181.8.3.1
1119