summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNaveen Saini <naveen.kumar.saini@intel.com>2021-08-27 15:28:28 +0800
committerAnuj Mittal <anuj.mittal@intel.com>2021-08-27 23:28:04 +0800
commit23d702e38f4e9759651a5121188f7853e97989aa (patch)
tree406f67f5b111e6fc5f42bbba361ec67d6b60b9c7
parente69b38c4b7db4dd25204f6d4e6bb576c57aa3e6e (diff)
downloadmeta-intel-23d702e38f4e9759651a5121188f7853e97989aa.tar.gz
llvm/10.0.0: apply ispc recommended patches
ISPC recommends building LLVM 10 with some additional patches to work around some bugs in this version. Add those patches to our build as well. https://github.com/ispc/ispc/tree/v1.16.1/llvm_patches Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch96
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch105
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch43
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch34
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch40
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch61
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch97
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch173
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend8
9 files changed, 657 insertions, 0 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch
new file mode 100644
index 00000000..09be8202
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch
@@ -0,0 +1,96 @@
1From 294ca2fd69a077b35acec9d498120d6cb0324dae Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 11:53:27 +0800
4Subject: [PATCH 1/2] This patch is required to fix the crash referenced to in
5 #1767
6
7It is a port of the following llvm 11.0 commit : https://reviews.llvm.org/D76994.
8
9Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/41f13f1f64d2074ae7512fb23656c22585e912bd]
10
11Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
12---
13 .../CodeGen/SelectionDAG/LegalizeTypes.cpp | 3 +-
14 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 31 ++++++++++++-------
15 2 files changed, 21 insertions(+), 13 deletions(-)
16
17diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
18index 63ddb59fce68..822da2183269 100644
19--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
20+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
21@@ -173,7 +173,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
22 }
23 }
24 }
25-
26+#ifndef NDEBUG
27 // Checked that NewNodes are only used by other NewNodes.
28 for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
29 SDNode *N = NewNodes[i];
30@@ -181,6 +181,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
31 UI != UE; ++UI)
32 assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
33 }
34+#endif
35 }
36
37 /// This is the main entry point for the type legalizer. This does a top-down
38diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
39index faae14444d51..b908c5c58e9f 100644
40--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
41+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
42@@ -155,7 +155,9 @@ private:
43 const SDValue &getSDValue(TableId &Id) {
44 RemapId(Id);
45 assert(Id && "TableId should be non-zero");
46- return IdToValueMap[Id];
47+ auto I = IdToValueMap.find(Id);
48+ assert(I != IdToValueMap.end() && "cannot find Id in map");
49+ return I->second;
50 }
51
52 public:
53@@ -172,24 +174,29 @@ public:
54 bool run();
55
56 void NoteDeletion(SDNode *Old, SDNode *New) {
57+ assert(Old != New && "node replaced with self");
58 for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
59 TableId NewId = getTableId(SDValue(New, i));
60 TableId OldId = getTableId(SDValue(Old, i));
61
62- if (OldId != NewId)
63+ if (OldId != NewId) {
64 ReplacedValues[OldId] = NewId;
65
66- // Delete Node from tables.
67+ // Delete Node from tables. We cannot do this when OldId == NewId,
68+ // because NewId can still have table references to it in
69+ // ReplacedValues.
70+ IdToValueMap.erase(OldId);
71+ PromotedIntegers.erase(OldId);
72+ ExpandedIntegers.erase(OldId);
73+ SoftenedFloats.erase(OldId);
74+ PromotedFloats.erase(OldId);
75+ ExpandedFloats.erase(OldId);
76+ ScalarizedVectors.erase(OldId);
77+ SplitVectors.erase(OldId);
78+ WidenedVectors.erase(OldId);
79+ }
80+
81 ValueToIdMap.erase(SDValue(Old, i));
82- IdToValueMap.erase(OldId);
83- PromotedIntegers.erase(OldId);
84- ExpandedIntegers.erase(OldId);
85- SoftenedFloats.erase(OldId);
86- PromotedFloats.erase(OldId);
87- ExpandedFloats.erase(OldId);
88- ScalarizedVectors.erase(OldId);
89- SplitVectors.erase(OldId);
90- WidenedVectors.erase(OldId);
91 }
92 }
93
94--
952.17.1
96
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch
new file mode 100644
index 00000000..72877d83
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch
@@ -0,0 +1,105 @@
1From d266087e8dba9e8fd4984e1cb85c20376e2c8ea3 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 11:56:01 +0800
4Subject: [PATCH 2/2] This patch is a fix for #1788.
5
6It is a port of the following llvm 11.0 commit: https://reviews.llvm.org/D81698
7This also needed part of another llvm 11.0 commit: https://reviews.llvm.org/D72975
8
9Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/aeb50448019ce1b1002f3781f9647d486320d83c]
10
11Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
12---
13 llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++++++---
14 .../InstCombine/InstructionCombining.cpp | 27 +++++++++++++++++--
15 2 files changed, 44 insertions(+), 5 deletions(-)
16
17diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
18index 6621fc9f819c..fb7ad93519f6 100644
19--- a/llvm/include/llvm/IR/PatternMatch.h
20+++ b/llvm/include/llvm/IR/PatternMatch.h
21@@ -152,8 +152,10 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
22
23 struct apint_match {
24 const APInt *&Res;
25+ bool AllowUndef;
26
27- apint_match(const APInt *&R) : Res(R) {}
28+ apint_match(const APInt *&Res, bool AllowUndef)
29+ : Res(Res), AllowUndef(AllowUndef) {}
30
31 template <typename ITy> bool match(ITy *V) {
32 if (auto *CI = dyn_cast<ConstantInt>(V)) {
33@@ -162,7 +164,8 @@ struct apint_match {
34 }
35 if (V->getType()->isVectorTy())
36 if (const auto *C = dyn_cast<Constant>(V))
37- if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
38+ if (auto *CI = dyn_cast_or_null<ConstantInt>(
39+ C->getSplatValue(AllowUndef))) {
40 Res = &CI->getValue();
41 return true;
42 }
43@@ -192,7 +195,20 @@ struct apfloat_match {
44
45 /// Match a ConstantInt or splatted ConstantVector, binding the
46 /// specified pointer to the contained APInt.
47-inline apint_match m_APInt(const APInt *&Res) { return Res; }
48+inline apint_match m_APInt(const APInt *&Res) {
49+ // Forbid undefs by default to maintain previous behavior.
50+ return apint_match(Res, /* AllowUndef */ false);
51+}
52+
53+/// Match APInt while allowing undefs in splat vector constants.
54+inline apint_match m_APIntAllowUndef(const APInt *&Res) {
55+ return apint_match(Res, /* AllowUndef */ true);
56+}
57+
58+/// Match APInt while forbidding undefs in splat vector constants.
59+inline apint_match m_APIntForbidUndef(const APInt *&Res) {
60+ return apint_match(Res, /* AllowUndef */ false);
61+}
62
63 /// Match a ConstantFP or splatted ConstantVector, binding the
64 /// specified pointer to the contained APFloat.
65diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
66index bf32996d96e2..40a246b9d7a7 100644
67--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
68+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
69@@ -925,8 +925,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
70 if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
71 if (CI->hasOneUse()) {
72 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
73- if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
74- (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
75+
76+ // FIXME: This is a hack to avoid infinite looping with min/max patterns.
77+ // We have to ensure that vector constants that only differ with
78+ // undef elements are treated as equivalent.
79+ auto areLooselyEqual = [](Value *A, Value *B) {
80+ if (A == B)
81+ return true;
82+
83+ // Test for vector constants.
84+ Constant *ConstA, *ConstB;
85+ if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
86+ return false;
87+
88+ // TODO: Deal with FP constants?
89+ if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
90+ return false;
91+
92+ // Compare for equality including undefs as equal.
93+ auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
94+ const APInt *C;
95+ return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
96+ };
97+
98+ if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
99+ (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
100 return nullptr;
101 }
102 }
103--
1042.17.1
105
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch
new file mode 100644
index 00000000..fc6935a1
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch
@@ -0,0 +1,43 @@
1From 8f83e2b7618da7a98a30839a8f41a6dd82dec468 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:00:23 +0800
4Subject: [PATCH 1/2] This patch is required to fix stability problem #1793
5
6It's backport of the following llvm 11.0 commit: 120c5f1057dc50229f73bc75bbabf4df6ee50fef
7
8Upstream-Status: Backport
9
10Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
11---
12 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++--
13 1 file changed, 4 insertions(+), 2 deletions(-)
14
15diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
16index 2476fd26f250..2743acc89bca 100644
17--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
18+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
19@@ -10702,8 +10702,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
20 SDValue N0 = N->getOperand(0);
21 EVT VT = N->getValueType(0);
22
23+ // zext_vector_inreg(undef) = 0 because the top bits will be zero.
24 if (N0.isUndef())
25- return DAG.getUNDEF(VT);
26+ return DAG.getConstant(0, SDLoc(N), VT);
27
28 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
29 return Res;
30@@ -10718,8 +10719,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
31 SDValue N0 = N->getOperand(0);
32 EVT VT = N->getValueType(0);
33
34+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
35 if (N0.isUndef())
36- return DAG.getUNDEF(VT);
37+ return DAG.getConstant(0, SDLoc(N), VT);
38
39 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
40 return Res;
41--
422.17.1
43
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch
new file mode 100644
index 00000000..e3e70107
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch
@@ -0,0 +1,34 @@
1From 62b05a69b4a185cd0b7535f19742686e19fcaf22 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:02:37 +0800
4Subject: [PATCH 2/2] Fix for #1844, affects avx512skx-i8x64 and
5 avx512skx-i16x32.
6
7It's a port of 11.0 commit edcfb47ff6d5562e22207f364c65f84302aa346b
8https://reviews.llvm.org/D76312
9
10Upstream-Status: Backport
11
12Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
13---
14 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++-
15 1 file changed, 3 insertions(+), 1 deletion(-)
16
17diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
18index 2743acc89bca..439a8367dabe 100644
19--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
20+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
21@@ -10841,7 +10841,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
22
23 // Attempt to pre-truncate BUILD_VECTOR sources.
24 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
25- TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
26+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
27+ // Avoid creating illegal types if running after type legalizer.
28+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
29 SDLoc DL(N);
30 EVT SVT = VT.getScalarType();
31 SmallVector<SDValue, 8> TruncOps;
32--
332.17.1
34
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch
new file mode 100644
index 00000000..8aca5fbf
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch
@@ -0,0 +1,40 @@
1From cc4301f82ca1bde1d438c3708de285b0ab8c72d3 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:07:25 +0800
4Subject: [PATCH 1/2] [X86] createVariablePermute - handle case where recursive
5 createVariablePermute call fails
6
7Account for the case where a recursive createVariablePermute call with a wider vector type fails.
8
9Original test case from @craig.topper (Craig Topper)
10
11Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804]
12
13Signed-off-by: Simon Pilgrim <llvm-dev@redking.me.uk>
14Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
15---
16 llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++---
17 1 file changed, 5 insertions(+), 3 deletions(-)
18
19diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
20index c8720d9ae3a6..63eb050e9b3a 100644
21--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
22+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
23@@ -9571,9 +9571,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
24 IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
25 IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
26 Subtarget, DAG, SDLoc(IndicesVec));
27- return extractSubVector(
28- createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0,
29- DAG, DL, SizeInBits);
30+ SDValue NewSrcVec =
31+ createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
32+ if (NewSrcVec)
33+ return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits);
34+ return SDValue();
35 } else if (SrcVec.getValueSizeInBits() < SizeInBits) {
36 // Widen smaller SrcVec to match VT.
37 SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
38--
392.17.1
40
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch
new file mode 100644
index 00000000..e03c279f
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch
@@ -0,0 +1,61 @@
1From 9cdff0785d5cf9effc8e922d3330311c4d3dda78 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:09:42 +0800
4Subject: [PATCH 2/2] This patch is needed for avx512skx-i8x64 and
5 avx512skx-i16x32 targets.
6
7This is combination of two commits:
8- 0cd6712a7af0fa2702b5d4cc733500eb5e62e7d0 - stability fix.
9- d8ad7cc0885f32104a7cd83c77191aec15fd684f - performance follow up.
10
11Upstream-Status: Backport
12
13Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
14---
15 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 +++++++++++++++++--
16 1 file changed, 21 insertions(+), 2 deletions(-)
17
18diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
19index 439a8367dabe..b1639c7f275d 100644
20--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
21+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
22@@ -18471,6 +18471,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
23
24 // Allow targets to opt-out.
25 EVT VT = Extract->getValueType(0);
26+
27+ // We can only create byte sized loads.
28+ if (!VT.isByteSized())
29+ return SDValue();
30+
31+ unsigned Index = ExtIdx->getZExtValue();
32+ unsigned NumElts = VT.getVectorNumElements();
33+
34+ // If the index is a multiple of the extract element count, we can offset the
35+ // address by the store size multiplied by the subvector index. Otherwise if
36+ // the scalar type is byte sized, we can just use the index multiplied by
37+ // the element size in bytes as the offset.
38+ unsigned Offset;
39+ if (Index % NumElts == 0)
40+ Offset = (Index / NumElts) * VT.getStoreSize();
41+ else if (VT.getScalarType().isByteSized())
42+ Offset = Index * VT.getScalarType().getStoreSize();
43+ else
44+ return SDValue();
45+
46 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
47 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
48 return SDValue();
49@@ -18478,8 +18498,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
50 // The narrow load will be offset from the base address of the old load if
51 // we are extracting from something besides index 0 (little-endian).
52 SDLoc DL(Extract);
53- SDValue BaseAddr = Ld->getOperand(1);
54- unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
55+ SDValue BaseAddr = Ld->getBasePtr();
56
57 // TODO: Use "BaseIndexOffset" to make this more effective.
58 SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
59--
602.17.1
61
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch
new file mode 100644
index 00000000..d1768216
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch
@@ -0,0 +1,97 @@
1From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:13:00 +0800
4Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we
5 store zeros in the rest of the byte
6
7We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0.
8
9I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue.
10
11Should fix PR48147.
12
13Reviewed By: RKSimon
14
15Differential Revision: https://reviews.llvm.org/D9129
16
17Upstream-Status: Backport
18
19Signed-off-by:Craig Topper <craig.topper@sifive.com>
20Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
21---
22 llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------
23 llvm/lib/Target/X86/X86InstrAVX512.td | 2 --
24 2 files changed, 14 insertions(+), 8 deletions(-)
25
26diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
27index 63eb050e9b3a..96b5e2cfbd82 100644
28--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
29+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
30@@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
31 // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
32 if (StoredVal.getValueType().isVector() &&
33 StoredVal.getValueType().getVectorElementType() == MVT::i1) {
34- assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
35- "Unexpected VT");
36+ unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
37+ assert(NumElts <= 8 && "Unexpected VT");
38 assert(!St->isTruncatingStore() && "Expected non-truncating store");
39 assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
40 "Expected AVX512F without AVX512DQI");
41
42+ // We must pad with zeros to ensure we store zeroes to any unused bits.
43 StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
44 DAG.getUNDEF(MVT::v16i1), StoredVal,
45 DAG.getIntPtrConstant(0, dl));
46 StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
47 StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
48+ // Make sure we store zeros in the extra bits.
49+ if (NumElts < 8)
50+ StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
51+ MVT::getIntegerVT(NumElts));
52
53 return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
54 St->getPointerInfo(), St->getAlignment(),
55@@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
56
57 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
58 StoredVal = DAG.getBitcast(NewVT, StoredVal);
59-
60- return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
61+ SDValue Val = StoredVal.getOperand(0);
62+ // We must store zeros to the unused bits.
63+ Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
64+ return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(),
65 St->getPointerInfo(), St->getAlignment(),
66 St->getMemOperand()->getFlags());
67 }
68@@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
69 }
70
71 // Widen v2i1/v4i1 stores to v8i1.
72- if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
73+ if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
74 Subtarget.hasAVX512()) {
75 unsigned NumConcats = 8 / VT.getVectorNumElements();
76- SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
77+ // We must store zeros to the unused bits.
78+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
79 Ops[0] = StoredVal;
80 StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
81 return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
82diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
83index 32f012033fb0..d3b92183f87b 100644
84--- a/llvm/lib/Target/X86/X86InstrAVX512.td
85+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
86@@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
87
88 // Load/store kreg
89 let Predicates = [HasDQI] in {
90- def : Pat<(store VK1:$src, addr:$dst),
91- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
92
93 def : Pat<(v1i1 (load addr:$src)),
94 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
95--
962.17.1
97
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch
new file mode 100644
index 00000000..03b40e9b
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch
@@ -0,0 +1,173 @@
1From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:15:09 +0800
4Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via
5 getRegisterTypeForCallingConv rather than using CCPromoteToType in the td
6 file
7
8 Previously we tried to promote these to xmm/ymm/zmm by promoting
9 in the X86CallingConv.td file. But this breaks when we run out
10 of xmm/ymm/zmm registers and need to fall back to memory. We end
11 up trying to create a non-sensical scalar to vector. This lead
12 to an assertion. The new tests in avx512-calling-conv.ll all
13 trigger this assertion.
14
15 Since we really want to treat these types like we do on avx2,
16 it seems better to promote them before the calling convention
17 code gets involved. Except when the calling convention is one
18 that passes the vXi1 type in a k register.
19
20 The changes in avx512-regcall-Mask.ll are because we indicated
21 that xmm/ymm/zmm types should be passed indirectly for the
22 Win64 ABI before we go to the common lines that promoted the
23 vXi1 types. This caused the promoted types to be picked up by
24 the default calling convention code. Now we promote them earlier
25 so they get passed indirectly as though they were xmm/ymm/zmm.
26
27 Differential Revision: https://reviews.llvm.org/D75154
28
29Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc]
30
31Signed-off-by:Craig Topper <craig.topper@intel.com>
32Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
33---
34 llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++--------
35 1 file changed, 61 insertions(+), 29 deletions(-)
36
37diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
38index 96b5e2cfbd82..d5de94aeb8a2 100644
39--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
40+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
41@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
42 return TargetLoweringBase::getPreferredVectorAction(VT);
43 }
44
45+static std::pair<MVT, unsigned>
46+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
47+ const X86Subtarget &Subtarget) {
48+ // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
49+ // convention is one that uses k registers.
50+ if (NumElts == 2)
51+ return {MVT::v2i64, 1};
52+ if (NumElts == 4)
53+ return {MVT::v4i32, 1};
54+ if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
55+ CC != CallingConv::Intel_OCL_BI)
56+ return {MVT::v8i16, 1};
57+ if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
58+ CC != CallingConv::Intel_OCL_BI)
59+ return {MVT::v16i8, 1};
60+ // v32i1 passes in ymm unless we have BWI and the calling convention is
61+ // regcall.
62+ if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
63+ return {MVT::v32i8, 1};
64+ // Split v64i1 vectors if we don't have v64i8 available.
65+ if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
66+ if (Subtarget.useAVX512Regs())
67+ return {MVT::v64i8, 1};
68+ return {MVT::v32i8, 2};
69+ }
70+
71+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
72+ if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
73+ NumElts > 64)
74+ return {MVT::i8, NumElts};
75+
76+ return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
77+}
78+
79 MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
80 CallingConv::ID CC,
81 EVT VT) const {
82- // v32i1 vectors should be promoted to v32i8 to match avx2.
83- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
84- return MVT::v32i8;
85- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
86 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
87- Subtarget.hasAVX512() &&
88- (!isPowerOf2_32(VT.getVectorNumElements()) ||
89- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
90- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
91- return MVT::i8;
92- // Split v64i1 vectors if we don't have v64i8 available.
93- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
94- CC != CallingConv::X86_RegCall)
95- return MVT::v32i1;
96+ Subtarget.hasAVX512()) {
97+ unsigned NumElts = VT.getVectorNumElements();
98+
99+ MVT RegisterVT;
100+ unsigned NumRegisters;
101+ std::tie(RegisterVT, NumRegisters) =
102+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
103+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
104+ return RegisterVT;
105+ }
106+
107 // FIXME: Should we just make these types legal and custom split operations?
108 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
109 Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
110 return MVT::v16i32;
111+
112 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
113 }
114
115 unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
116 CallingConv::ID CC,
117 EVT VT) const {
118- // v32i1 vectors should be promoted to v32i8 to match avx2.
119- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
120- return 1;
121- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
122 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
123- Subtarget.hasAVX512() &&
124- (!isPowerOf2_32(VT.getVectorNumElements()) ||
125- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
126- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
127- return VT.getVectorNumElements();
128- // Split v64i1 vectors if we don't have v64i8 available.
129- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
130- CC != CallingConv::X86_RegCall)
131- return 2;
132+ Subtarget.hasAVX512()) {
133+ unsigned NumElts = VT.getVectorNumElements();
134+
135+ MVT RegisterVT;
136+ unsigned NumRegisters;
137+ std::tie(RegisterVT, NumRegisters) =
138+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
139+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
140+ return NumRegisters;
141+ }
142+
143 // FIXME: Should we just make these types legal and custom split operations?
144 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
145 Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
146 return 1;
147+
148 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
149 }
150
151@@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
152 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
153 Subtarget.hasAVX512() &&
154 (!isPowerOf2_32(VT.getVectorNumElements()) ||
155- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
156- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
157+ (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
158+ VT.getVectorNumElements() > 64)) {
159 RegisterVT = MVT::i8;
160 IntermediateVT = MVT::i1;
161 NumIntermediates = VT.getVectorNumElements();
162@@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
163 // Split v64i1 vectors if we don't have v64i8 available.
164 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
165 CC != CallingConv::X86_RegCall) {
166- RegisterVT = MVT::v32i1;
167+ RegisterVT = MVT::v32i8;
168 IntermediateVT = MVT::v32i1;
169 NumIntermediates = 2;
170 return 2;
171--
1722.17.1
173
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
index b144411d..3f304215 100644
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
@@ -18,6 +18,14 @@ SRC_URI_LLVM10_PATCHES = " \
18 file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \ 18 file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
19 file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \ 19 file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
20 file://llvm10-0007-support-cl_ext_float_atomics.patch \ 20 file://llvm10-0007-support-cl_ext_float_atomics.patch \
21 file://llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch \
22 file://llvm10-0009-ispc-10_0_fix_for_1788.patch \
23 file://llvm10-0010-ispc-10_0_fix_for_1793.patch \
24 file://llvm10-0011-ispc-10_0_fix_for_1844.patch \
25 file://llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch \
26 file://llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch \
27 file://llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch \
28 file://llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch \
21 " 29 "
22 30
23SRC_URI_LLVM11_PATCHES = " \ 31SRC_URI_LLVM11_PATCHES = " \