1 files changed, 0 insertions, 173 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch
deleted file mode 100644
index 03b40e9b..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch
+++ /dev/null
@@ -1,173 +0,0 @@
-From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001
-From: Naveen Saini <naveen.kumar.saini@intel.com>
-Date: Fri, 27 Aug 2021 12:15:09 +0800
-Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via
- getRegisterTypeForCallingConv rather than using CCPromoteToType in the td
- file
-    Previously we tried to promote these to xmm/ymm/zmm by promoting
-    in the X86CallingConv.td file. But this breaks when we run out
-    of xmm/ymm/zmm registers and need to fall back to memory. We end
-    up trying to create a non-sensical scalar to vector. This lead
-    to an assertion. The new tests in avx512-calling-conv.ll all
-    trigger this assertion.
-    Since we really want to treat these types like we do on avx2,
-    it seems better to promote them before the calling convention
-    code gets involved. Except when the calling convention is one
-    that passes the vXi1 type in a k register.
-    The changes in avx512-regcall-Mask.ll are because we indicated
-    that xmm/ymm/zmm types should be passed indirectly for the
-    Win64 ABI before we go to the common lines that promoted the
-    vXi1 types. This caused the promoted types to be picked up by
-    the default calling convention code. Now we promote them earlier
-    so they get passed indirectly as though they were xmm/ymm/zmm.
-    Differential Revision: https://reviews.llvm.org/D75154
-Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc]
-Signed-off-by:Craig Topper <craig.topper@intel.com>
-Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
---
- llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++--------
- 1 file changed, 61 insertions(+), 29 deletions(-)
-diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
-index 96b5e2cfbd82..d5de94aeb8a2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
-+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
-@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
-   return TargetLoweringBase::getPreferredVectorAction(VT);
- }
- 
-+static std::pair<MVT, unsigned>
-+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
-+                                 const X86Subtarget &Subtarget) {
-+  // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
-+  // convention is one that uses k registers.
-+  if (NumElts == 2)
-+    return {MVT::v2i64, 1};
-+  if (NumElts == 4)
-+    return {MVT::v4i32, 1};
-+  if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
-+      CC != CallingConv::Intel_OCL_BI)
-+    return {MVT::v8i16, 1};
-+  if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
-+      CC != CallingConv::Intel_OCL_BI)
-+    return {MVT::v16i8, 1};
-+  // v32i1 passes in ymm unless we have BWI and the calling convention is
-+  // regcall.
-+  if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
-+    return {MVT::v32i8, 1};
-+  // Split v64i1 vectors if we don't have v64i8 available.
-+  if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
-+    if (Subtarget.useAVX512Regs())
-+      return {MVT::v64i8, 1};
-+    return {MVT::v32i8, 2};
-+  }
-+
-+  // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
-+  if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
-+      NumElts > 64)
-+    return {MVT::i8, NumElts};
-+
-+  return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
-+}
-+
- MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
-                                                      CallingConv::ID CC,
-                                                      EVT VT) const {
-  // v32i1 vectors should be promoted to v32i8 to match avx2.
-  if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
-    return MVT::v32i8;
-  // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
-   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
-      Subtarget.hasAVX512() &&
-      (!isPowerOf2_32(VT.getVectorNumElements()) ||
-       (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
-       (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
-    return MVT::i8;
-  // Split v64i1 vectors if we don't have v64i8 available.
-  if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
-      CC != CallingConv::X86_RegCall)
-    return MVT::v32i1;
-+      Subtarget.hasAVX512()) {
-+    unsigned NumElts = VT.getVectorNumElements();
-+
-+    MVT RegisterVT;
-+    unsigned NumRegisters;
-+    std::tie(RegisterVT, NumRegisters) =
-+        handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
-+    if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
-+      return RegisterVT;
-+  }
-+
-   // FIXME: Should we just make these types legal and custom split operations?
-   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
-       Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
-     return MVT::v16i32;
-+
-   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
- }
- 
- unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
-                                                           CallingConv::ID CC,
-                                                           EVT VT) const {
-  // v32i1 vectors should be promoted to v32i8 to match avx2.
-  if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
-    return 1;
-  // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
-   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
-      Subtarget.hasAVX512() &&
-      (!isPowerOf2_32(VT.getVectorNumElements()) ||
-       (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
-       (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
-    return VT.getVectorNumElements();
-  // Split v64i1 vectors if we don't have v64i8 available.
-  if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
-      CC != CallingConv::X86_RegCall)
-    return 2;
-+      Subtarget.hasAVX512()) {
-+    unsigned NumElts = VT.getVectorNumElements();
-+
-+    MVT RegisterVT;
-+    unsigned NumRegisters;
-+    std::tie(RegisterVT, NumRegisters) =
-+        handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
-+    if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
-+      return NumRegisters;
-+  }
-+
-   // FIXME: Should we just make these types legal and custom split operations?
-   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
-       Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
-     return 1;
-+
-   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
- }
- 
-@@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
-   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
-       Subtarget.hasAVX512() &&
-       (!isPowerOf2_32(VT.getVectorNumElements()) ||
-       (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
-       (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
-+       (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
-+       VT.getVectorNumElements() > 64)) {
-     RegisterVT = MVT::i8;
-     IntermediateVT = MVT::i1;
-     NumIntermediates = VT.getVectorNumElements();
-@@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
-   // Split v64i1 vectors if we don't have v64i8 available.
-   if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
-       CC != CallingConv::X86_RegCall) {
-    RegisterVT = MVT::v32i1;
-+    RegisterVT = MVT::v32i8;
-     IntermediateVT = MVT::v32i1;
-     NumIntermediates = 2;
-     return 2;
-- 
-2.17.1

diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch deleted file mode 100644 index 03b40e9b..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch +++ /dev/null
@@ -1,173 +0,0 @@
1	From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001
2	From: Naveen Saini <naveen.kumar.saini@intel.com>
3	Date: Fri, 27 Aug 2021 12:15:09 +0800
4	Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via
5	getRegisterTypeForCallingConv rather than using CCPromoteToType in the td
6	file
7
8	Previously we tried to promote these to xmm/ymm/zmm by promoting
9	in the X86CallingConv.td file. But this breaks when we run out
10	of xmm/ymm/zmm registers and need to fall back to memory. We end
11	up trying to create a non-sensical scalar to vector. This lead
12	to an assertion. The new tests in avx512-calling-conv.ll all
13	trigger this assertion.
14
15	Since we really want to treat these types like we do on avx2,
16	it seems better to promote them before the calling convention
17	code gets involved. Except when the calling convention is one
18	that passes the vXi1 type in a k register.
19
20	The changes in avx512-regcall-Mask.ll are because we indicated
21	that xmm/ymm/zmm types should be passed indirectly for the
22	Win64 ABI before we go to the common lines that promoted the
23	vXi1 types. This caused the promoted types to be picked up by
24	the default calling convention code. Now we promote them earlier
25	so they get passed indirectly as though they were xmm/ymm/zmm.
26
27	Differential Revision: https://reviews.llvm.org/D75154
28
29	Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc]
30
31	Signed-off-by:Craig Topper <craig.topper@intel.com>
32	Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
33	---
34	llvm/lib/Target/X86/X86ISelLowering.cpp \| 90 +++++++++++++++++--------
35	1 file changed, 61 insertions(+), 29 deletions(-)
36
37	diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
38	index 96b5e2cfbd82..d5de94aeb8a2 100644
39	--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
40	+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
41	@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
42	return TargetLoweringBase::getPreferredVectorAction(VT);
43	}
44
45	+static std::pair<MVT, unsigned>
46	+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
47	+ const X86Subtarget &Subtarget) {
48	+ // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
49	+ // convention is one that uses k registers.
50	+ if (NumElts == 2)
51	+ return {MVT::v2i64, 1};
52	+ if (NumElts == 4)
53	+ return {MVT::v4i32, 1};
54	+ if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
55	+ CC != CallingConv::Intel_OCL_BI)
56	+ return {MVT::v8i16, 1};
57	+ if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
58	+ CC != CallingConv::Intel_OCL_BI)
59	+ return {MVT::v16i8, 1};
60	+ // v32i1 passes in ymm unless we have BWI and the calling convention is
61	+ // regcall.
62	+ if (NumElts == 32 && (!Subtarget.hasBWI() \|\| CC != CallingConv::X86_RegCall))
63	+ return {MVT::v32i8, 1};
64	+ // Split v64i1 vectors if we don't have v64i8 available.
65	+ if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
66	+ if (Subtarget.useAVX512Regs())
67	+ return {MVT::v64i8, 1};
68	+ return {MVT::v32i8, 2};
69	+ }
70	+
71	+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
72	+ if (!isPowerOf2_32(NumElts) \|\| (NumElts == 64 && !Subtarget.hasBWI()) \|\|
73	+ NumElts > 64)
74	+ return {MVT::i8, NumElts};
75	+
76	+ return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
77	+}
78	+
79	MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
80	CallingConv::ID CC,
81	EVT VT) const {
82	- // v32i1 vectors should be promoted to v32i8 to match avx2.
83	- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
84	- return MVT::v32i8;
85	- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
86	if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
87	- Subtarget.hasAVX512() &&
88	- (!isPowerOf2_32(VT.getVectorNumElements()) \|\|
89	- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) \|\|
90	- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
91	- return MVT::i8;
92	- // Split v64i1 vectors if we don't have v64i8 available.
93	- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
94	- CC != CallingConv::X86_RegCall)
95	- return MVT::v32i1;
96	+ Subtarget.hasAVX512()) {
97	+ unsigned NumElts = VT.getVectorNumElements();
98	+
99	+ MVT RegisterVT;
100	+ unsigned NumRegisters;
101	+ std::tie(RegisterVT, NumRegisters) =
102	+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
103	+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
104	+ return RegisterVT;
105	+ }
106	+
107	// FIXME: Should we just make these types legal and custom split operations?
108	if ((VT == MVT::v32i16 \|\| VT == MVT::v64i8) && !EnableOldKNLABI &&
109	Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
110	return MVT::v16i32;
111	+
112	return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
113	}
114
115	unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
116	CallingConv::ID CC,
117	EVT VT) const {
118	- // v32i1 vectors should be promoted to v32i8 to match avx2.
119	- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
120	- return 1;
121	- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
122	if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
123	- Subtarget.hasAVX512() &&
124	- (!isPowerOf2_32(VT.getVectorNumElements()) \|\|
125	- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) \|\|
126	- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
127	- return VT.getVectorNumElements();
128	- // Split v64i1 vectors if we don't have v64i8 available.
129	- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
130	- CC != CallingConv::X86_RegCall)
131	- return 2;
132	+ Subtarget.hasAVX512()) {
133	+ unsigned NumElts = VT.getVectorNumElements();
134	+
135	+ MVT RegisterVT;
136	+ unsigned NumRegisters;
137	+ std::tie(RegisterVT, NumRegisters) =
138	+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
139	+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
140	+ return NumRegisters;
141	+ }
142	+
143	// FIXME: Should we just make these types legal and custom split operations?
144	if ((VT == MVT::v32i16 \|\| VT == MVT::v64i8) && !EnableOldKNLABI &&
145	Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
146	return 1;
147	+
148	return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
149	}
150
151	@@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
152	if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
153	Subtarget.hasAVX512() &&
154	(!isPowerOf2_32(VT.getVectorNumElements()) \|\|
155	- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) \|\|
156	- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
157	+ (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) \|\|
158	+ VT.getVectorNumElements() > 64)) {
159	RegisterVT = MVT::i8;
160	IntermediateVT = MVT::i1;
161	NumIntermediates = VT.getVectorNumElements();
162	@@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
163	// Split v64i1 vectors if we don't have v64i8 available.
164	if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
165	CC != CallingConv::X86_RegCall) {
166	- RegisterVT = MVT::v32i1;
167	+ RegisterVT = MVT::v32i8;
168	IntermediateVT = MVT::v32i1;
169	NumIntermediates = 2;
170	return 2;
171	--
172	2.17.1
173