diff options
Diffstat (limited to 'dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch')
-rw-r--r-- | dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch | 294 |
1 files changed, 0 insertions, 294 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch deleted file mode 100644 index 2e935a13..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/0002-OpenCL-Simplify-LLVM-IR-generated-for-OpenCL-blocks.patch +++ /dev/null | |||
@@ -1,294 +0,0 @@ | |||
1 | From c94ec28600255098ffb9d73d1b386a7c8a535590 Mon Sep 17 00:00:00 2001 | ||
2 | From: Andrew Savonichev <andrew.savonichev@intel.com> | ||
3 | Date: Thu, 21 Feb 2019 11:02:10 +0000 | ||
4 | Subject: [PATCH 2/2] [OpenCL] Simplify LLVM IR generated for OpenCL blocks | ||
5 | |||
6 | Summary: | ||
7 | Emit direct call of block invoke functions when possible, i.e. in case the | ||
8 | block is not passed as a function argument. | ||
9 | Also doing some refactoring of `CodeGenFunction::EmitBlockCallExpr()` | ||
10 | |||
11 | Reviewers: Anastasia, yaxunl, svenvh | ||
12 | |||
13 | Reviewed By: Anastasia | ||
14 | |||
15 | Subscribers: cfe-commits | ||
16 | |||
17 | Tags: #clang | ||
18 | |||
19 | Differential Revision: https://reviews.llvm.org/D58388 | ||
20 | |||
21 | git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354568 91177308-0d34-0410-b5e6-96231b3b80d8 | ||
22 | |||
23 | Upstream-Status: Backport | ||
24 | [https://github.com/llvm-mirror/clang/commit/eae71f8d05ce550c4e2595c9b7082cc2c7882c58] | ||
25 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
26 | --- | ||
27 | lib/CodeGen/CGBlocks.cpp | 77 +++++++++++++------------- | ||
28 | lib/CodeGen/CGOpenCLRuntime.cpp | 30 +++++++--- | ||
29 | lib/CodeGen/CGOpenCLRuntime.h | 4 ++ | ||
30 | test/CodeGenOpenCL/blocks.cl | 10 +--- | ||
31 | test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 34 +++++++++--- | ||
32 | 5 files changed, 91 insertions(+), 64 deletions(-) | ||
33 | |||
34 | diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp | ||
35 | index fa3c3ee..10a0238 100644 | ||
36 | --- a/lib/CodeGen/CGBlocks.cpp | ||
37 | +++ b/lib/CodeGen/CGBlocks.cpp | ||
38 | @@ -1261,52 +1261,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, | ||
39 | ReturnValueSlot ReturnValue) { | ||
40 | const BlockPointerType *BPT = | ||
41 | E->getCallee()->getType()->getAs<BlockPointerType>(); | ||
42 | - | ||
43 | llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); | ||
44 | - | ||
45 | - // Get a pointer to the generic block literal. | ||
46 | - // For OpenCL we generate generic AS void ptr to be able to reuse the same | ||
47 | - // block definition for blocks with captures generated as private AS local | ||
48 | - // variables and without captures generated as global AS program scope | ||
49 | - // variables. | ||
50 | - unsigned AddrSpace = 0; | ||
51 | - if (getLangOpts().OpenCL) | ||
52 | - AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); | ||
53 | - | ||
54 | - llvm::Type *BlockLiteralTy = | ||
55 | - llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); | ||
56 | - | ||
57 | - // Bitcast the callee to a block literal. | ||
58 | - BlockPtr = | ||
59 | - Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); | ||
60 | - | ||
61 | - // Get the function pointer from the literal. | ||
62 | - llvm::Value *FuncPtr = | ||
63 | - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, | ||
64 | - CGM.getLangOpts().OpenCL ? 2 : 3); | ||
65 | - | ||
66 | - // Add the block literal. | ||
67 | + llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType(); | ||
68 | + llvm::Value *Func = nullptr; | ||
69 | + QualType FnType = BPT->getPointeeType(); | ||
70 | + ASTContext &Ctx = getContext(); | ||
71 | CallArgList Args; | ||
72 | |||
73 | - QualType VoidPtrQualTy = getContext().VoidPtrTy; | ||
74 | - llvm::Type *GenericVoidPtrTy = VoidPtrTy; | ||
75 | if (getLangOpts().OpenCL) { | ||
76 | - GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType(); | ||
77 | - VoidPtrQualTy = | ||
78 | - getContext().getPointerType(getContext().getAddrSpaceQualType( | ||
79 | - getContext().VoidTy, LangAS::opencl_generic)); | ||
80 | - } | ||
81 | - | ||
82 | - BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); | ||
83 | - Args.add(RValue::get(BlockPtr), VoidPtrQualTy); | ||
84 | - | ||
85 | - QualType FnType = BPT->getPointeeType(); | ||
86 | + // For OpenCL, BlockPtr is already casted to generic block literal. | ||
87 | + | ||
88 | + // First argument of a block call is a generic block literal casted to | ||
89 | + // generic void pointer, i.e. i8 addrspace(4)* | ||
90 | + llvm::Value *BlockDescriptor = Builder.CreatePointerCast( | ||
91 | + BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType()); | ||
92 | + QualType VoidPtrQualTy = Ctx.getPointerType( | ||
93 | + Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic)); | ||
94 | + Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy); | ||
95 | + // And the rest of the arguments. | ||
96 | + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); | ||
97 | + | ||
98 | + // We *can* call the block directly unless it is a function argument. | ||
99 | + if (!isa<ParmVarDecl>(E->getCalleeDecl())) | ||
100 | + Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); | ||
101 | + else { | ||
102 | + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2); | ||
103 | + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); | ||
104 | + } | ||
105 | + } else { | ||
106 | + // Bitcast the block literal to a generic block literal. | ||
107 | + BlockPtr = Builder.CreatePointerCast( | ||
108 | + BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal"); | ||
109 | + // Get pointer to the block invoke function | ||
110 | + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3); | ||
111 | |||
112 | - // And the rest of the arguments. | ||
113 | - EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); | ||
114 | + // First argument is a block literal casted to a void pointer | ||
115 | + BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy); | ||
116 | + Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy); | ||
117 | + // And the rest of the arguments. | ||
118 | + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); | ||
119 | |||
120 | - // Load the function. | ||
121 | - llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); | ||
122 | + // Load the function. | ||
123 | + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); | ||
124 | + } | ||
125 | |||
126 | const FunctionType *FuncTy = FnType->castAs<FunctionType>(); | ||
127 | const CGFunctionInfo &FnInfo = | ||
128 | diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp | ||
129 | index 7f6f595..75003e5 100644 | ||
130 | --- a/lib/CodeGen/CGOpenCLRuntime.cpp | ||
131 | +++ b/lib/CodeGen/CGOpenCLRuntime.cpp | ||
132 | @@ -123,6 +123,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { | ||
133 | CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); | ||
134 | } | ||
135 | |||
136 | +// Get the block literal from an expression derived from the block expression. | ||
137 | +// OpenCL v2.0 s6.12.5: | ||
138 | +// Block variable declarations are implicitly qualified with const. Therefore | ||
139 | +// all block variables must be initialized at declaration time and may not be | ||
140 | +// reassigned. | ||
141 | +static const BlockExpr *getBlockExpr(const Expr *E) { | ||
142 | + const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop. | ||
143 | + while(!isa<BlockExpr>(E) && E != Prev) { | ||
144 | + Prev = E; | ||
145 | + E = E->IgnoreCasts(); | ||
146 | + if (auto DR = dyn_cast<DeclRefExpr>(E)) { | ||
147 | + E = cast<VarDecl>(DR->getDecl())->getInit(); | ||
148 | + } | ||
149 | + } | ||
150 | + return cast<BlockExpr>(E); | ||
151 | +} | ||
152 | + | ||
153 | /// Record emitted llvm invoke function and llvm block literal for the | ||
154 | /// corresponding block expression. | ||
155 | void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, | ||
156 | @@ -137,20 +154,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, | ||
157 | EnqueuedBlockMap[E].Kernel = nullptr; | ||
158 | } | ||
159 | |||
160 | +llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { | ||
161 | + return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc; | ||
162 | +} | ||
163 | + | ||
164 | CGOpenCLRuntime::EnqueuedBlockInfo | ||
165 | CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { | ||
166 | CGF.EmitScalarExpr(E); | ||
167 | |||
168 | // The block literal may be assigned to a const variable. Chasing down | ||
169 | // to get the block literal. | ||
170 | - if (auto DR = dyn_cast<DeclRefExpr>(E)) { | ||
171 | - E = cast<VarDecl>(DR->getDecl())->getInit(); | ||
172 | - } | ||
173 | - E = E->IgnoreImplicit(); | ||
174 | - if (auto Cast = dyn_cast<CastExpr>(E)) { | ||
175 | - E = Cast->getSubExpr(); | ||
176 | - } | ||
177 | - auto *Block = cast<BlockExpr>(E); | ||
178 | + const BlockExpr *Block = getBlockExpr(E); | ||
179 | |||
180 | assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && | ||
181 | "Block expression not emitted"); | ||
182 | diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h | ||
183 | index 750721f..4effc7e 100644 | ||
184 | --- a/lib/CodeGen/CGOpenCLRuntime.h | ||
185 | +++ b/lib/CodeGen/CGOpenCLRuntime.h | ||
186 | @@ -92,6 +92,10 @@ public: | ||
187 | /// \param Block block literal emitted for the block expression. | ||
188 | void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, | ||
189 | llvm::Value *Block); | ||
190 | + | ||
191 | + /// \return LLVM block invoke function emitted for an expression derived from | ||
192 | + /// the block expression. | ||
193 | + llvm::Function *getInvokeFunction(const Expr *E); | ||
194 | }; | ||
195 | |||
196 | } | ||
197 | diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl | ||
198 | index 19aacc3..ab5a2c6 100644 | ||
199 | --- a/test/CodeGenOpenCL/blocks.cl | ||
200 | +++ b/test/CodeGenOpenCL/blocks.cl | ||
201 | @@ -39,11 +39,8 @@ void foo(){ | ||
202 | // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* | ||
203 | // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]], | ||
204 | // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]] | ||
205 | - // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 | ||
206 | // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* | ||
207 | - // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] | ||
208 | - // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)* | ||
209 | - // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]]) | ||
210 | + // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]]) | ||
211 | // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2 | ||
212 | // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]] | ||
213 | // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 | ||
214 | @@ -53,11 +50,8 @@ void foo(){ | ||
215 | // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic* | ||
216 | // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]], | ||
217 | // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]] | ||
218 | - // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2 | ||
219 | // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8* | ||
220 | - // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]] | ||
221 | - // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)* | ||
222 | - // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]]) | ||
223 | + // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]]) | ||
224 | |||
225 | int (^ block_B)(void) = ^{ | ||
226 | return i; | ||
227 | diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
228 | index 8445016..1566912 100644 | ||
229 | --- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
230 | +++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl | ||
231 | @@ -312,9 +312,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
232 | }; | ||
233 | |||
234 | // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. | ||
235 | - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) | ||
236 | - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* | ||
237 | - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
238 | + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
239 | block_A(); | ||
240 | |||
241 | // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. | ||
242 | @@ -333,15 +331,35 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
243 | unsigned size = get_kernel_work_group_size(block_A); | ||
244 | |||
245 | // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted. | ||
246 | - // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) | ||
247 | - // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* | ||
248 | - // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
249 | + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
250 | block_A(); | ||
251 | |||
252 | + // Make sure that block invoke function is resolved correctly after sequence of assignements. | ||
253 | + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* | ||
254 | + // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* | ||
255 | + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) | ||
256 | + // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), | ||
257 | + // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b1, | ||
258 | + bl_t b1 = block_G; | ||
259 | + // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* | ||
260 | + // COMMON-SAME: addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* | ||
261 | + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) | ||
262 | + // COMMON-SAME: to %struct.__opencl_block_literal_generic addrspace(4)*), | ||
263 | + // COMMON-SAME: %struct.__opencl_block_literal_generic addrspace(4)** %b2, | ||
264 | + bl_t b2 = b1; | ||
265 | + // COMMON: call spir_func void @block_G_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* | ||
266 | + // COMMON-SAME: bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) | ||
267 | + // COOMON-SAME: to i8 addrspace(4)*), i8 addrspace(3)* null) | ||
268 | + b2(0); | ||
269 | + // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]]. | ||
270 | + // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl( | ||
271 | + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), | ||
272 | + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) | ||
273 | + size = get_kernel_preferred_work_group_size_multiple(b2); | ||
274 | + | ||
275 | void (^block_C)(void) = ^{ | ||
276 | callee(i, a); | ||
277 | }; | ||
278 | - | ||
279 | // Emits block literal on stack and block kernel [[INVLK3]]. | ||
280 | // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke | ||
281 | // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue | ||
282 | @@ -404,8 +422,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { | ||
283 | // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}}) | ||
284 | // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}}) | ||
285 | // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}}) | ||
286 | +// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) | ||
287 | // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}}) | ||
288 | // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) | ||
289 | -// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) | ||
290 | // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}}) | ||
291 | // COMMON: define internal spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}}) | ||
292 | -- | ||
293 | 1.8.3.1 | ||
294 | |||