diff options
Diffstat (limited to 'dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch')
-rw-r--r-- | dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch | 986 |
1 files changed, 0 insertions, 986 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch deleted file mode 100644 index 2037421b..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0001-Update-LowerOpenCL-pass-to-handle-new-blocks-represn.patch +++ /dev/null | |||
@@ -1,986 +0,0 @@ | |||
1 | From 177cce531fd3665bb964a03db51890e0241e3e72 Mon Sep 17 00:00:00 2001 | ||
2 | From: Alexey Sotkin <alexey.sotkin@intel.com> | ||
3 | Date: Thu, 21 Feb 2019 17:14:36 +0300 | ||
4 | Subject: [PATCH] Update LowerOpenCL pass to handle new blocks represntation in | ||
5 | LLVM IR | ||
6 | |||
7 | Upstream-Status: Backport [https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/bd6ddfaf7232cd81c7f2fe9877e66f286731bd8e] | ||
8 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
9 | |||
10 | --- | ||
11 | lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 249 ++++-------------------------- | ||
12 | test/global_block.ll | 71 ++++----- | ||
13 | test/literal-struct.ll | 31 ++-- | ||
14 | test/transcoding/block_w_struct_return.ll | 47 +++--- | ||
15 | test/transcoding/enqueue_kernel.ll | 237 ++++++++++++++++------------ | ||
16 | 5 files changed, 235 insertions(+), 400 deletions(-) | ||
17 | |||
18 | diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp | ||
19 | index c80bf04..b42a4ec 100644 | ||
20 | --- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp | ||
21 | +++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp | ||
22 | @@ -40,207 +40,34 @@ | ||
23 | // In both cases values with function type used as intermediate representation | ||
24 | // for block literal structure. | ||
25 | // | ||
26 | -// This pass is designed to find such cases and simplify them to avoid any | ||
27 | -// function pointer types occurrences in LLVM IR in 4 steps. | ||
28 | -// | ||
29 | -// 1. Find all function pointer allocas, like | ||
30 | -// %block = alloca void () * | ||
31 | -// | ||
32 | -// Then find a single store to that alloca: | ||
33 | -// %blockLit = alloca <{ i32, i32, ...}>, align 4 | ||
34 | -// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()* | ||
35 | -// > store void ()* %0, void ()** %block, align 4 | ||
36 | -// | ||
37 | -// And replace the alloca users by new instructions which used stored value | ||
38 | -// %blockLit itself instead of function pointer alloca %block. | ||
39 | -// | ||
40 | -// 2. Find consecutive casts from block literal type to i8 addrspace(4)* | ||
41 | -// used function pointers as an intermediate type: | ||
42 | -// %0 = bitcast <{ i32, i32 }> %block to void() * | ||
43 | -// %1 = addrspacecast void() * %0 to i8 addrspace(4)* | ||
44 | -// And simplify them: | ||
45 | -// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)* | ||
46 | -// | ||
47 | -// 3. Find all unused instructions with function pointer type occured after | ||
48 | -// pp.1-2 and remove them. | ||
49 | -// | ||
50 | -// 4. Find unused globals with function pointer type, like | ||
51 | -// @block = constant void ()* | ||
52 | -// bitcast ({ i32, i32 }* @__block_literal_global to void ()* | ||
53 | -// | ||
54 | -// And remove them. | ||
55 | +// In LLVM IR produced by clang, blocks are represented with the following | ||
56 | +// structure: | ||
57 | +// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
58 | +// Pointers to block invoke functions are stored in the third field. Clang | ||
59 | +// replaces inderect function calls in all cases except if block is passed as a | ||
60 | +// function argument. Note that it is somewhat unclear if the OpenCL C spec | ||
61 | +// should allow passing blocks as function argumernts. This pass is not supposed | ||
62 | +// to work correctly with such functions. | ||
63 | +// Clang though has to store function pointers to this structure. Purpose of | ||
64 | +// this pass is to replace store of function pointers(not allowed in SPIR-V) | ||
65 | +// with null pointers. | ||
66 | // | ||
67 | //===----------------------------------------------------------------------===// | ||
68 | #define DEBUG_TYPE "spv-lower-ocl-blocks" | ||
69 | |||
70 | -#include "OCLUtil.h" | ||
71 | #include "SPIRVInternal.h" | ||
72 | |||
73 | -#include "llvm/ADT/SetVector.h" | ||
74 | -#include "llvm/Analysis/ValueTracking.h" | ||
75 | -#include "llvm/IR/GlobalVariable.h" | ||
76 | -#include "llvm/IR/InstIterator.h" | ||
77 | #include "llvm/IR/Module.h" | ||
78 | #include "llvm/Pass.h" | ||
79 | -#include "llvm/PassSupport.h" | ||
80 | -#include "llvm/Support/Casting.h" | ||
81 | +#include "llvm/Support/Regex.h" | ||
82 | |||
83 | using namespace llvm; | ||
84 | |||
85 | namespace { | ||
86 | |||
87 | -static void | ||
88 | -removeUnusedFunctionPtrInst(Instruction *I, | ||
89 | - SmallSetVector<Instruction *, 16> &FuncPtrInsts) { | ||
90 | - for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) { | ||
91 | - Instruction *OpI = dyn_cast<Instruction>(I->getOperand(OpIdx)); | ||
92 | - I->setOperand(OpIdx, nullptr); | ||
93 | - if (OpI && OpI != I && OpI->user_empty()) | ||
94 | - FuncPtrInsts.insert(OpI); | ||
95 | - } | ||
96 | - I->eraseFromParent(); | ||
97 | -} | ||
98 | - | ||
99 | -static bool isFuncPtrAlloca(const AllocaInst *AI) { | ||
100 | - auto *ET = dyn_cast<PointerType>(AI->getAllocatedType()); | ||
101 | - return ET && ET->getElementType()->isFunctionTy(); | ||
102 | -} | ||
103 | - | ||
104 | -static bool hasFuncPtrType(const Value *V) { | ||
105 | - auto *PT = dyn_cast<PointerType>(V->getType()); | ||
106 | - return PT && PT->getElementType()->isFunctionTy(); | ||
107 | -} | ||
108 | - | ||
109 | -static bool isFuncPtrInst(const Instruction *I) { | ||
110 | - if (auto *AI = dyn_cast<AllocaInst>(I)) | ||
111 | - return isFuncPtrAlloca(AI); | ||
112 | - | ||
113 | - for (auto &Op : I->operands()) { | ||
114 | - if (auto *AI = dyn_cast<AllocaInst>(Op)) | ||
115 | - return isFuncPtrAlloca(AI); | ||
116 | - | ||
117 | - auto *OpI = dyn_cast<Instruction>(&Op); | ||
118 | - if (OpI && OpI != I && hasFuncPtrType(OpI)) | ||
119 | - return true; | ||
120 | - } | ||
121 | - return false; | ||
122 | -} | ||
123 | - | ||
124 | -static StoreInst *findSingleStore(AllocaInst *AI) { | ||
125 | - StoreInst *Store = nullptr; | ||
126 | - for (auto *U : AI->users()) { | ||
127 | - if (!isa<StoreInst>(U)) | ||
128 | - continue; // not a store | ||
129 | - if (Store) | ||
130 | - return nullptr; // there are more than one stores | ||
131 | - Store = dyn_cast<StoreInst>(U); | ||
132 | - } | ||
133 | - return Store; | ||
134 | -} | ||
135 | - | ||
136 | -static void fixFunctionPtrAllocaUsers(AllocaInst *AI) { | ||
137 | - // Find and remove a single store to alloca | ||
138 | - auto *SingleStore = findSingleStore(AI); | ||
139 | - assert(SingleStore && "More than one store to the function pointer alloca"); | ||
140 | - auto *StoredVal = SingleStore->getValueOperand(); | ||
141 | - SingleStore->eraseFromParent(); | ||
142 | - | ||
143 | - // Find loads from the alloca and replace thier users | ||
144 | - for (auto *U : AI->users()) { | ||
145 | - auto *LI = dyn_cast<LoadInst>(U); | ||
146 | - if (!LI) | ||
147 | - continue; | ||
148 | - | ||
149 | - for (auto *U : LI->users()) { | ||
150 | - auto *UInst = cast<Instruction>(U); | ||
151 | - auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast( | ||
152 | - StoredVal, UInst->getType(), "", UInst); | ||
153 | - UInst->replaceAllUsesWith(Cast); | ||
154 | - } | ||
155 | - } | ||
156 | -} | ||
157 | - | ||
158 | -static int getBlockLiteralIdx(const Function &F) { | ||
159 | - StringRef FName = F.getName(); | ||
160 | - if (isEnqueueKernelBI(FName)) | ||
161 | - return FName.contains("events") ? 7 : 4; | ||
162 | - if (isKernelQueryBI(FName)) | ||
163 | - return FName.contains("for_ndrange") ? 2 : 1; | ||
164 | - if (FName.startswith("__") && FName.contains("_block_invoke")) | ||
165 | - return F.hasStructRetAttr() ? 1 : 0; | ||
166 | - | ||
167 | - return -1; // No block literal argument | ||
168 | -} | ||
169 | - | ||
170 | -static bool hasBlockLiteralArg(const Function &F) { | ||
171 | - return getBlockLiteralIdx(F) != -1; | ||
172 | -} | ||
173 | - | ||
174 | -static bool simplifyFunctionPtrCasts(Function &F) { | ||
175 | - bool Changed = false; | ||
176 | - int BlockLiteralIdx = getBlockLiteralIdx(F); | ||
177 | - for (auto *U : F.users()) { | ||
178 | - auto *Call = dyn_cast<CallInst>(U); | ||
179 | - if (!Call) | ||
180 | - continue; | ||
181 | - if (Call->getFunction()->getName() == F.getName().str() + "_kernel") | ||
182 | - continue; // Skip block invoke function calls inside block invoke kernels | ||
183 | - | ||
184 | - const DataLayout &DL = F.getParent()->getDataLayout(); | ||
185 | - auto *BlockLiteral = Call->getOperand(BlockLiteralIdx); | ||
186 | - auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL); | ||
187 | - if (isa<GlobalVariable>(BlockLiteralVal)) | ||
188 | - continue; // nothing to do with globals | ||
189 | - | ||
190 | - auto *BlockLiteralAlloca = cast<AllocaInst>(BlockLiteralVal); | ||
191 | - assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() && | ||
192 | - "Function type shouldn't be there"); | ||
193 | - | ||
194 | - auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast( | ||
195 | - BlockLiteralAlloca, BlockLiteral->getType(), "", Call); | ||
196 | - BlockLiteral->replaceAllUsesWith(NewBlockLiteral); | ||
197 | - Changed |= true; | ||
198 | - } | ||
199 | - return Changed; | ||
200 | -} | ||
201 | - | ||
202 | -static void | ||
203 | -findFunctionPtrAllocas(Module &M, | ||
204 | - SmallVectorImpl<AllocaInst *> &FuncPtrAllocas) { | ||
205 | - for (auto &F : M) { | ||
206 | - if (F.isDeclaration()) | ||
207 | - continue; | ||
208 | - for (auto &I : instructions(F)) { | ||
209 | - auto *AI = dyn_cast<AllocaInst>(&I); | ||
210 | - if (!AI || !isFuncPtrAlloca(AI)) | ||
211 | - continue; | ||
212 | - FuncPtrAllocas.push_back(AI); | ||
213 | - } | ||
214 | - } | ||
215 | -} | ||
216 | - | ||
217 | -static void | ||
218 | -findUnusedFunctionPtrInsts(Module &M, | ||
219 | - SmallSetVector<Instruction *, 16> &FuncPtrInsts) { | ||
220 | - for (auto &F : M) { | ||
221 | - if (F.isDeclaration()) | ||
222 | - continue; | ||
223 | - for (auto &I : instructions(F)) | ||
224 | - if (I.user_empty() && isFuncPtrInst(&I)) | ||
225 | - FuncPtrInsts.insert(&I); | ||
226 | - } | ||
227 | -} | ||
228 | - | ||
229 | -static void | ||
230 | -findUnusedFunctionPtrGlbs(Module &M, | ||
231 | - SmallVectorImpl<GlobalVariable *> &FuncPtrGlbs) { | ||
232 | - for (auto &GV : M.globals()) { | ||
233 | - if (!GV.user_empty()) | ||
234 | - continue; | ||
235 | - auto *GVType = dyn_cast<PointerType>(GV.getType()->getElementType()); | ||
236 | - if (GVType && GVType->getElementType()->isFunctionTy()) | ||
237 | - FuncPtrGlbs.push_back(&GV); | ||
238 | - } | ||
239 | +static bool isBlockInvoke(Function &F) { | ||
240 | + static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$"); | ||
241 | + return BlockInvokeRegex.match(F.getName()); | ||
242 | } | ||
243 | |||
244 | class SPIRVLowerOCLBlocks : public ModulePass { | ||
245 | @@ -250,44 +77,24 @@ public: | ||
246 | |||
247 | bool runOnModule(Module &M) { | ||
248 | bool Changed = false; | ||
249 | - | ||
250 | - // 1. Find function pointer allocas and fix their users | ||
251 | - SmallVector<AllocaInst *, 16> FuncPtrAllocas; | ||
252 | - findFunctionPtrAllocas(M, FuncPtrAllocas); | ||
253 | - | ||
254 | - Changed |= !FuncPtrAllocas.empty(); | ||
255 | - for (auto *AI : FuncPtrAllocas) | ||
256 | - fixFunctionPtrAllocaUsers(AI); | ||
257 | - | ||
258 | - // 2. Simplify consecutive casts which use function pointer types | ||
259 | - for (auto &F : M) | ||
260 | - if (hasBlockLiteralArg(F)) | ||
261 | - Changed |= simplifyFunctionPtrCasts(F); | ||
262 | - | ||
263 | - // 3. Cleanup unused instructions with function pointer type | ||
264 | - // which are occured after pp. 1-2 | ||
265 | - SmallSetVector<Instruction *, 16> FuncPtrInsts; | ||
266 | - findUnusedFunctionPtrInsts(M, FuncPtrInsts); | ||
267 | - | ||
268 | - Changed |= !FuncPtrInsts.empty(); | ||
269 | - while (!FuncPtrInsts.empty()) { | ||
270 | - Instruction *I = FuncPtrInsts.pop_back_val(); | ||
271 | - removeUnusedFunctionPtrInst(I, FuncPtrInsts); | ||
272 | + for (Function &F : M) { | ||
273 | + if (!isBlockInvoke(F)) | ||
274 | + continue; | ||
275 | + for (User *U : F.users()) { | ||
276 | + if (!isa<Constant>(U)) | ||
277 | + continue; | ||
278 | + Constant *Null = Constant::getNullValue(U->getType()); | ||
279 | + if (U != Null) { | ||
280 | + U->replaceAllUsesWith(Null); | ||
281 | + Changed = true; | ||
282 | + } | ||
283 | + } | ||
284 | } | ||
285 | - | ||
286 | - // 4. Find and remove unused global variables with function pointer type | ||
287 | - SmallVector<GlobalVariable *, 16> FuncPtrGlbs; | ||
288 | - findUnusedFunctionPtrGlbs(M, FuncPtrGlbs); | ||
289 | - | ||
290 | - Changed |= !FuncPtrGlbs.empty(); | ||
291 | - for (auto *GV : FuncPtrGlbs) | ||
292 | - GV->eraseFromParent(); | ||
293 | - | ||
294 | return Changed; | ||
295 | } | ||
296 | |||
297 | static char ID; | ||
298 | -}; // class SPIRVLowerOCLBlocks | ||
299 | +}; | ||
300 | |||
301 | char SPIRVLowerOCLBlocks::ID = 0; | ||
302 | |||
303 | diff --git a/test/global_block.ll b/test/global_block.ll | ||
304 | index 4fc453b..b558213 100644 | ||
305 | --- a/test/global_block.ll | ||
306 | +++ b/test/global_block.ll | ||
307 | @@ -17,7 +17,7 @@ | ||
308 | ; RUN: spirv-val %t.spv | ||
309 | ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM | ||
310 | |||
311 | -target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" | ||
312 | +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
313 | target triple = "spir-unknown-unknown" | ||
314 | |||
315 | ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke" | ||
316 | @@ -27,71 +27,56 @@ target triple = "spir-unknown-unknown" | ||
317 | ; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]] | ||
318 | ; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]] | ||
319 | |||
320 | -;; This variable is not needed in SPIRV | ||
321 | -; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1 | ||
322 | -; CHECK-LLVM-NOT: @block_kernel.b1 | ||
323 | -@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8 | ||
324 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
325 | |||
326 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
327 | +@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4 | ||
328 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 | ||
329 | |||
330 | -; Function Attrs: convergent nounwind | ||
331 | -define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { | ||
332 | +; Function Attrs: convergent noinline nounwind optnone | ||
333 | +define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { | ||
334 | entry: | ||
335 | - %res.addr = alloca i32 addrspace(1)*, align 8 | ||
336 | - store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 | ||
337 | - | ||
338 | + %res.addr = alloca i32 addrspace(1)*, align 4 | ||
339 | + store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4 | ||
340 | ; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]] | ||
341 | ; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5) | ||
342 | - %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 | ||
343 | - | ||
344 | - %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 | ||
345 | - store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14 | ||
346 | + %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 | ||
347 | + %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4 | ||
348 | + store i32 %call, i32 addrspace(1)* %0, align 4 | ||
349 | ret void | ||
350 | } | ||
351 | |||
352 | -; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]] | ||
353 | +; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]] | ||
354 | ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}} | ||
355 | ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}} | ||
356 | ; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}}) | ||
357 | -; Function Attrs: convergent nounwind | ||
358 | +; Function Attrs: convergent noinline nounwind optnone | ||
359 | define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 { | ||
360 | entry: | ||
361 | - %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 | ||
362 | + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
363 | %i.addr = alloca i32, align 4 | ||
364 | - store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 | ||
365 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
366 | - store i32 %i, i32* %i.addr, align 4, !tbaa !14 | ||
367 | - %0 = load i32, i32* %i.addr, align 4, !tbaa !14 | ||
368 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
369 | + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
370 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
371 | + store i32 %i, i32* %i.addr, align 4 | ||
372 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
373 | + %0 = load i32, i32* %i.addr, align 4 | ||
374 | %add = add nsw i32 %0, 1 | ||
375 | ret i32 %add | ||
376 | } | ||
377 | |||
378 | -attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
379 | -attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
380 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
381 | +attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
382 | attributes #2 = { convergent } | ||
383 | |||
384 | !llvm.module.flags = !{!0} | ||
385 | -!opencl.enable.FP_CONTRACT = !{} | ||
386 | !opencl.ocl.version = !{!1} | ||
387 | !opencl.spir.version = !{!1} | ||
388 | -!opencl.used.extensions = !{!2} | ||
389 | -!opencl.used.optional.core.features = !{!2} | ||
390 | -!opencl.compiler.options = !{!2} | ||
391 | -!llvm.ident = !{!3} | ||
392 | +!llvm.ident = !{!2} | ||
393 | |||
394 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
395 | !1 = !{i32 2, i32 0} | ||
396 | -!2 = !{} | ||
397 | -!3 = !{!"clang version 7.0.0"} | ||
398 | -!4 = !{i32 1} | ||
399 | -!5 = !{!"none"} | ||
400 | -!6 = !{!"int*"} | ||
401 | -!7 = !{!""} | ||
402 | -!8 = !{i1 false} | ||
403 | -!9 = !{i32 0} | ||
404 | -!10 = !{!11, !11, i64 0} | ||
405 | -!11 = !{!"any pointer", !12, i64 0} | ||
406 | -!12 = !{!"omnipotent char", !13, i64 0} | ||
407 | -!13 = !{!"Simple C/C++ TBAA"} | ||
408 | -!14 = !{!15, !15, i64 0} | ||
409 | -!15 = !{!"int", !12, i64 0} | ||
410 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
411 | +!3 = !{i32 1} | ||
412 | +!4 = !{!"none"} | ||
413 | +!5 = !{!"int*"} | ||
414 | +!6 = !{!""} | ||
415 | diff --git a/test/literal-struct.ll b/test/literal-struct.ll | ||
416 | index b88187f..dec957a 100644 | ||
417 | --- a/test/literal-struct.ll | ||
418 | +++ b/test/literal-struct.ll | ||
419 | @@ -2,7 +2,7 @@ | ||
420 | ; structs, i.e. structs whose type has no name. Typicaly clang generate such | ||
421 | ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with | ||
422 | ; the following command: | ||
423 | -; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll | ||
424 | +; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll | ||
425 | |||
426 | ; literal-struct.cl: | ||
427 | ; void foo() | ||
428 | @@ -17,25 +17,28 @@ | ||
429 | ; RUN: llvm-spirv %t.bc -o %t.spv | ||
430 | ; RUN: spirv-val %t.spv | ||
431 | |||
432 | -; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0 | ||
433 | -; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}} | ||
434 | +; CHECK: TypeInt [[Int:[0-9]+]] 32 0 | ||
435 | +; CHECK: TypeInt [[Int8:[0-9]+]] 8 0 | ||
436 | +; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]] | ||
437 | +; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]] | ||
438 | |||
439 | target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
440 | target triple = "spir" | ||
441 | |||
442 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
443 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
444 | + | ||
445 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 | ||
446 | ; CHECK: ConstantComposite [[StructType]] | ||
447 | |||
448 | -; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct. | ||
449 | -@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4 | ||
450 | +@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4 | ||
451 | ; CHECK: ConstantNull [[StructType]] | ||
452 | |||
453 | ; Function Attrs: convergent noinline nounwind optnone | ||
454 | define spir_func void @foo() #0 { | ||
455 | entry: | ||
456 | - %myBlock = alloca void () addrspace(4)*, align 4 | ||
457 | - store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4 | ||
458 | - call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 | ||
459 | + %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4 | ||
460 | + store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4 | ||
461 | + call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 | ||
462 | ret void | ||
463 | } | ||
464 | |||
465 | @@ -43,14 +46,14 @@ entry: | ||
466 | define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 { | ||
467 | entry: | ||
468 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
469 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 | ||
470 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
471 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
472 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
473 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 | ||
474 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
475 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
476 | ret void | ||
477 | } | ||
478 | |||
479 | -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
480 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
481 | attributes #1 = { convergent } | ||
482 | |||
483 | !llvm.module.flags = !{!0} | ||
484 | @@ -60,4 +63,4 @@ attributes #1 = { convergent } | ||
485 | |||
486 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
487 | !1 = !{i32 2, i32 0} | ||
488 | -!2 = !{!"clang version 8.0.0 "} | ||
489 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
490 | diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll | ||
491 | index a68820f..ebd2c5f 100644 | ||
492 | --- a/test/transcoding/block_w_struct_return.ll | ||
493 | +++ b/test/transcoding/block_w_struct_return.ll | ||
494 | @@ -16,6 +16,8 @@ | ||
495 | ; res[tid] = kernelBlock(aa).a - 6; | ||
496 | ; } | ||
497 | |||
498 | +; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll | ||
499 | + | ||
500 | ; RUN: llvm-as %s -o %t.bc | ||
501 | ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt | ||
502 | ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV | ||
503 | @@ -28,12 +30,14 @@ | ||
504 | ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke" | ||
505 | |||
506 | ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32 | ||
507 | +; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8 | ||
508 | +; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]] | ||
509 | ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]] | ||
510 | ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]] | ||
511 | |||
512 | ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7 | ||
513 | ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7 | ||
514 | -; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}} | ||
515 | +; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}} | ||
516 | ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]] | ||
517 | |||
518 | ; CHECK-LLVM: %[[StructA:.*]] = type { i32 } | ||
519 | @@ -42,20 +46,21 @@ | ||
520 | target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
521 | target triple = "spir64-unknown-unknown" | ||
522 | |||
523 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
524 | %struct.A = type { i32 } | ||
525 | |||
526 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
527 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8 | ||
528 | |||
529 | ; Function Attrs: convergent noinline nounwind optnone | ||
530 | -define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 { | ||
531 | +define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { | ||
532 | entry: | ||
533 | %res.addr = alloca i32 addrspace(1)*, align 8 | ||
534 | - %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8 | ||
535 | + %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8 | ||
536 | %tid = alloca i64, align 8 | ||
537 | %aa = alloca %struct.A, align 4 | ||
538 | %tmp = alloca %struct.A, align 4 | ||
539 | store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8 | ||
540 | - store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8 | ||
541 | + store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8 | ||
542 | %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4 | ||
543 | store i64 %call, i64* %tid, align 8 | ||
544 | %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8 | ||
545 | @@ -64,7 +69,7 @@ entry: | ||
546 | store i32 -1, i32 addrspace(1)* %arrayidx, align 4 | ||
547 | %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0 | ||
548 | store i32 5, i32* %a, align 4 | ||
549 | - call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 | ||
550 | + call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 | ||
551 | %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0 | ||
552 | %2 = load i32, i32* %a1, align 4 | ||
553 | %sub = sub nsw i32 %2, 6 | ||
554 | @@ -79,10 +84,10 @@ entry: | ||
555 | define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 { | ||
556 | entry: | ||
557 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 | ||
558 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8 | ||
559 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8 | ||
560 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 | ||
561 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
562 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8 | ||
563 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
564 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8 | ||
565 | %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0 | ||
566 | store i32 6, i32* %a1, align 4 | ||
567 | %0 = bitcast %struct.A* %agg.result to i8* | ||
568 | @@ -97,30 +102,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r | ||
569 | ; Function Attrs: convergent nounwind readnone | ||
570 | declare spir_func i64 @_Z13get_global_idj(i32) #3 | ||
571 | |||
572 | -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
573 | -attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
574 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
575 | +attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
576 | attributes #2 = { argmemonly nounwind } | ||
577 | attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
578 | attributes #4 = { convergent nounwind readnone } | ||
579 | attributes #5 = { convergent } | ||
580 | |||
581 | !llvm.module.flags = !{!0} | ||
582 | -!opencl.enable.FP_CONTRACT = !{} | ||
583 | !opencl.ocl.version = !{!1} | ||
584 | !opencl.spir.version = !{!1} | ||
585 | -!opencl.used.extensions = !{!2} | ||
586 | -!opencl.used.optional.core.features = !{!2} | ||
587 | -!opencl.compiler.options = !{!2} | ||
588 | -!llvm.ident = !{!3} | ||
589 | +!llvm.ident = !{!2} | ||
590 | |||
591 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
592 | !1 = !{i32 2, i32 0} | ||
593 | -!2 = !{} | ||
594 | -!3 = !{!"clang version 7.0.0"} | ||
595 | -!4 = !{i32 1} | ||
596 | -!5 = !{!"none"} | ||
597 | -!6 = !{!"int*"} | ||
598 | -!7 = !{!""} | ||
599 | -!8 = !{i1 false} | ||
600 | -!9 = !{i32 0} | ||
601 | - | ||
602 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
603 | +!3 = !{i32 1} | ||
604 | +!4 = !{!"none"} | ||
605 | +!5 = !{!"int*"} | ||
606 | +!6 = !{!""} | ||
607 | diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll | ||
608 | index 1f0b360..761043e 100644 | ||
609 | --- a/test/transcoding/enqueue_kernel.ll | ||
610 | +++ b/test/transcoding/enqueue_kernel.ll | ||
611 | @@ -51,11 +51,12 @@ | ||
612 | ; ModuleID = 'enqueue_kernel.cl' | ||
613 | source_filename = "enqueue_kernel.cl" | ||
614 | target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
615 | -target triple = "spir-unknown-unknown" | ||
616 | +target triple = "spir" | ||
617 | |||
618 | %opencl.queue_t = type opaque | ||
619 | %struct.ndrange_t = type { i32 } | ||
620 | %opencl.clk_event_t = type opaque | ||
621 | +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } | ||
622 | |||
623 | ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel" | ||
624 | ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel" | ||
625 | @@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown" | ||
626 | |||
627 | ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32 | ||
628 | ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8 | ||
629 | -; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 | ||
630 | ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0 | ||
631 | -; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17 | ||
632 | +; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21 | ||
633 | ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2 | ||
634 | -; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20 | ||
635 | -; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] | ||
636 | +; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 | ||
637 | +; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24 | ||
638 | |||
639 | ; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}} | ||
640 | +; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] | ||
641 | +; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] | ||
642 | ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]] | ||
643 | ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]] | ||
644 | -; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] | ||
645 | ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]] | ||
646 | ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] | ||
647 | ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] | ||
648 | ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] | ||
649 | ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]] | ||
650 | |||
651 | -; CHECK-LLVM: [[BlockTy1:%[0-9a-z\.]+]] = type { i32, i32 } | ||
652 | -; CHECK-LLVM: [[BlockTy2:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }> | ||
653 | -; CHECK-LLVM: [[BlockTy3:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> | ||
654 | -; CHECK-LLVM: [[BlockTy4:%[0-9a-z\.]+]] = type <{ i32, i32 }> | ||
655 | +; CHECK-LLVM: [[BlockTy1:%[0-9a-z\.]+]] = type { i32, i32, i8 addrspace(4)* } | ||
656 | +; CHECK-LLVM: [[BlockTy2:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> | ||
657 | +; CHECK-LLVM: [[BlockTy3:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> | ||
658 | +; CHECK-LLVM: [[BlockTy4:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)* }> | ||
659 | |||
660 | -; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 | ||
661 | -; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 | ||
662 | +; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 | ||
663 | +; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 | ||
664 | |||
665 | -@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
666 | -@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 | ||
667 | +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4 | ||
668 | +@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4 | ||
669 | |||
670 | ; Function Attrs: convergent noinline nounwind optnone | ||
671 | -define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { | ||
672 | +define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { | ||
673 | entry: | ||
674 | + %a.addr = alloca i32 addrspace(1)*, align 4 | ||
675 | + %b.addr = alloca i32 addrspace(1)*, align 4 | ||
676 | + %i.addr = alloca i32, align 4 | ||
677 | + %c0.addr = alloca i8, align 1 | ||
678 | %default_queue = alloca %opencl.queue_t*, align 4 | ||
679 | %flags = alloca i32, align 4 | ||
680 | %ndrange = alloca %struct.ndrange_t, align 4 | ||
681 | %clk_event = alloca %opencl.clk_event_t*, align 4 | ||
682 | %event_wait_list = alloca %opencl.clk_event_t*, align 4 | ||
683 | %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4 | ||
684 | - %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4 | ||
685 | - %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 | ||
686 | + %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4 | ||
687 | + %tmp = alloca %struct.ndrange_t, align 4 | ||
688 | + %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 | ||
689 | + %tmp4 = alloca %struct.ndrange_t, align 4 | ||
690 | %c = alloca i8, align 1 | ||
691 | + %tmp11 = alloca %struct.ndrange_t, align 4 | ||
692 | + %block_sizes = alloca [1 x i32], align 4 | ||
693 | + %tmp12 = alloca %struct.ndrange_t, align 4 | ||
694 | + %block_sizes13 = alloca [3 x i32], align 4 | ||
695 | + store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4 | ||
696 | + store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4 | ||
697 | + store i32 %i, i32* %i.addr, align 4 | ||
698 | + store i8 %c0, i8* %c0.addr, align 1 | ||
699 | store i32 0, i32* %flags, align 4 | ||
700 | %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 | ||
701 | %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4 | ||
702 | store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4 | ||
703 | %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
704 | %2 = load i32, i32* %flags, align 4 | ||
705 | - %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 | ||
706 | - store i32 17, i32* %block.size, align 4 | ||
707 | - %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 | ||
708 | + %3 = bitcast %struct.ndrange_t* %tmp to i8* | ||
709 | + %4 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
710 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false) | ||
711 | + %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 | ||
712 | + store i32 21, i32* %block.size, align 4 | ||
713 | + %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 | ||
714 | store i32 4, i32* %block.align, align 4 | ||
715 | - %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 | ||
716 | - store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4 | ||
717 | - %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 | ||
718 | - store i32 %i, i32* %block.captured1, align 4 | ||
719 | - %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 | ||
720 | - store i8 %c0, i8* %block.captured2, align 4 | ||
721 | - %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()* | ||
722 | - %4 = addrspacecast void ()* %3 to i8 addrspace(4)* | ||
723 | + %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 | ||
724 | + store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4 | ||
725 | + %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 | ||
726 | + %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 | ||
727 | + store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4 | ||
728 | + %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 | ||
729 | + %6 = load i32, i32* %i.addr, align 4 | ||
730 | + store i32 %6, i32* %block.captured1, align 4 | ||
731 | + %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5 | ||
732 | + %7 = load i8, i8* %c0.addr, align 1 | ||
733 | + store i8 %7, i8* %block.captured2, align 4 | ||
734 | + %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic* | ||
735 | + %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)* | ||
736 | |||
737 | ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]] | ||
738 | ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} | ||
739 | ; [[ConstInt0]] [[EventNull]] [[EventNull]] | ||
740 | ; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]] | ||
741 | |||
742 | -; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)* | ||
743 | +; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic* | ||
744 | +; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)* | ||
745 | ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)* | ||
746 | -; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]]) | ||
747 | - | ||
748 | - %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4) | ||
749 | - %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* | ||
750 | - %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* | ||
751 | - %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 | ||
752 | - store i32 20, i32* %block.size5, align 4 | ||
753 | - %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 | ||
754 | +; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]]) | ||
755 | + | ||
756 | + %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) | ||
757 | + %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
758 | + %12 = load i32, i32* %flags, align 4 | ||
759 | + %13 = bitcast %struct.ndrange_t* %tmp4 to i8* | ||
760 | + %14 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
761 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false) | ||
762 | + %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* | ||
763 | + %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* | ||
764 | + %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 | ||
765 | + store i32 24, i32* %block.size5, align 4 | ||
766 | + %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 | ||
767 | store i32 4, i32* %block.align6, align 4 | ||
768 | - %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 | ||
769 | - store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4 | ||
770 | - %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 | ||
771 | - store i32 %i, i32* %block.captured8, align 4 | ||
772 | - %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 | ||
773 | - store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4 | ||
774 | - %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()* | ||
775 | - %9 = addrspacecast void ()* %8 to i8 addrspace(4)* | ||
776 | + %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 | ||
777 | + store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4 | ||
778 | + %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 | ||
779 | + %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 | ||
780 | + store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4 | ||
781 | + %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 | ||
782 | + %18 = load i32, i32* %i.addr, align 4 | ||
783 | + store i32 %18, i32* %block.captured9, align 4 | ||
784 | + %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5 | ||
785 | + %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4 | ||
786 | + store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4 | ||
787 | + %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic* | ||
788 | + %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)* | ||
789 | + | ||
790 | |||
791 | ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]] | ||
792 | ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]] | ||
793 | @@ -158,16 +193,24 @@ entry: | ||
794 | ; [[ConstInt2]] [[Event1]] [[Event2]] | ||
795 | ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]] | ||
796 | |||
797 | -; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)* | ||
798 | +; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic* | ||
799 | +; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4) | ||
800 | ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)* | ||
801 | -; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]]) | ||
802 | - | ||
803 | - %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) | ||
804 | - %11 = alloca [1 x i32] | ||
805 | - %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0 | ||
806 | - %13 = load i8, i8* %c, align 1 | ||
807 | - %14 = zext i8 %13 to i32 | ||
808 | - store i32 %14, i32* %12, align 4 | ||
809 | +; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]]) | ||
810 | + | ||
811 | + %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21) | ||
812 | + %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
813 | + %24 = load i32, i32* %flags, align 4 | ||
814 | + %25 = bitcast %struct.ndrange_t* %tmp11 to i8* | ||
815 | + %26 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
816 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false) | ||
817 | + %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 | ||
818 | + %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)* | ||
819 | + %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* | ||
820 | + %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0 | ||
821 | + %30 = load i8, i8* %c, align 1 | ||
822 | + %31 = zext i8 %30 to i32 | ||
823 | + store i32 %31, i32* %29, align 4 | ||
824 | |||
825 | ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]] | ||
826 | ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]] | ||
827 | @@ -182,14 +225,18 @@ entry: | ||
828 | ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)* | ||
829 | ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}}) | ||
830 | |||
831 | - %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12) | ||
832 | - %16 = alloca [3 x i32] | ||
833 | - %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0 | ||
834 | - store i32 1, i32* %17, align 4 | ||
835 | - %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1 | ||
836 | - store i32 2, i32* %18, align 4 | ||
837 | - %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2 | ||
838 | - store i32 4, i32* %19, align 4 | ||
839 | + %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29) | ||
840 | + %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 | ||
841 | + %34 = load i32, i32* %flags, align 4 | ||
842 | + %35 = bitcast %struct.ndrange_t* %tmp12 to i8* | ||
843 | + %36 = bitcast %struct.ndrange_t* %ndrange to i8* | ||
844 | + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false) | ||
845 | + %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0 | ||
846 | + store i32 1, i32* %37, align 4 | ||
847 | + %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1 | ||
848 | + store i32 2, i32* %38, align 4 | ||
849 | + %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2 | ||
850 | + store i32 4, i32* %39, align 4 | ||
851 | |||
852 | ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]] | ||
853 | ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]] | ||
854 | @@ -206,24 +253,27 @@ entry: | ||
855 | ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)* | ||
856 | ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}}) | ||
857 | |||
858 | - %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17) | ||
859 | + %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37) | ||
860 | ret void | ||
861 | } | ||
862 | |||
863 | +; Function Attrs: argmemonly nounwind | ||
864 | +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1 | ||
865 | + | ||
866 | ; Function Attrs: convergent noinline nounwind optnone | ||
867 | define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 { | ||
868 | entry: | ||
869 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
870 | - %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 | ||
871 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 | ||
872 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
873 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* | ||
874 | - store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 | ||
875 | - %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 | ||
876 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* | ||
877 | + store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 | ||
878 | + %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5 | ||
879 | %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4 | ||
880 | %conv = sext i8 %0 to i32 | ||
881 | - %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2 | ||
882 | + %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 | ||
883 | %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4 | ||
884 | - %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 | ||
885 | + %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 | ||
886 | %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4 | ||
887 | %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2 | ||
888 | store i32 %conv, i32 addrspace(1)* %arrayidx, align 4 | ||
889 | @@ -243,19 +293,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i | ||
890 | define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 { | ||
891 | entry: | ||
892 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
893 | - %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 | ||
894 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 | ||
895 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
896 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* | ||
897 | - store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 | ||
898 | - %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 | ||
899 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* | ||
900 | + store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 | ||
901 | + %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5 | ||
902 | %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4 | ||
903 | - %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 | ||
904 | + %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 | ||
905 | %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4 | ||
906 | %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1 | ||
907 | %2 = load i32, i32 addrspace(1)* %arrayidx, align 4 | ||
908 | - %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2 | ||
909 | + %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 | ||
910 | %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4 | ||
911 | - %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 | ||
912 | + %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 | ||
913 | %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4 | ||
914 | %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4 | ||
915 | store i32 %2, i32 addrspace(1)* %arrayidx4, align 4 | ||
916 | @@ -276,11 +326,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac | ||
917 | entry: | ||
918 | %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 | ||
919 | %p.addr = alloca i8 addrspace(3)*, align 4 | ||
920 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 | ||
921 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
922 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
923 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
924 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
925 | store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4 | ||
926 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 | ||
927 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
928 | ret void | ||
929 | } | ||
930 | |||
931 | @@ -300,13 +350,13 @@ entry: | ||
932 | %p1.addr = alloca i8 addrspace(3)*, align 4 | ||
933 | %p2.addr = alloca i8 addrspace(3)*, align 4 | ||
934 | %p3.addr = alloca i8 addrspace(3)*, align 4 | ||
935 | - %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 | ||
936 | + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 | ||
937 | store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 | ||
938 | - %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* | ||
939 | + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* | ||
940 | store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4 | ||
941 | store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4 | ||
942 | store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4 | ||
943 | - store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 | ||
944 | + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 | ||
945 | ret void | ||
946 | } | ||
947 | |||
948 | @@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*, | ||
949 | ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*) | ||
950 | ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*) | ||
951 | |||
952 | -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
953 | +attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
954 | attributes #1 = { argmemonly nounwind } | ||
955 | -attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
956 | +attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
957 | attributes #3 = { nounwind } | ||
958 | |||
959 | !llvm.module.flags = !{!0} | ||
960 | -!opencl.enable.FP_CONTRACT = !{} | ||
961 | !opencl.ocl.version = !{!1} | ||
962 | !opencl.spir.version = !{!1} | ||
963 | -!opencl.used.extensions = !{!2} | ||
964 | -!opencl.used.optional.core.features = !{!2} | ||
965 | -!opencl.compiler.options = !{!2} | ||
966 | -!llvm.ident = !{!3} | ||
967 | +!llvm.ident = !{!2} | ||
968 | |||
969 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
970 | !1 = !{i32 2, i32 0} | ||
971 | -!2 = !{} | ||
972 | -!3 = !{!"clang version 7.0.0"} | ||
973 | -!4 = !{i32 1, i32 1, i32 0, i32 0} | ||
974 | -!5 = !{!"none", !"none", !"none", !"none"} | ||
975 | -!6 = !{!"int*", !"int*", !"int", !"char"} | ||
976 | -!7 = !{!"", !"", !"", !""} | ||
977 | -!8 = !{i1 false, i1 false, i1 false, i1 false} | ||
978 | -!9 = !{i32 0, i32 0, i32 0, i32 0} | ||
979 | +!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} | ||
980 | +!3 = !{i32 1, i32 1, i32 0, i32 0} | ||
981 | +!4 = !{!"none", !"none", !"none", !"none"} | ||
982 | +!5 = !{!"int*", !"int*", !"int", !"char"} | ||
983 | +!6 = !{!"", !"", !"", !""} | ||
984 | -- | ||
985 | 2.7.4 | ||
986 | |||