llvm/10.0.0: apply ispc recommended patches

ISPC recommends building LLVM 10 with some additional patches to work around some bugs in this version. Add those patches to our build as well. https://github.com/ispc/ispc/tree/v1.16.1/llvm_patches Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
author: Naveen Saini <naveen.kumar.saini@intel.com> 2021-08-27 15:28:28 +0800
committer: Anuj Mittal <anuj.mittal@intel.com> 2021-08-27 23:28:04 +0800
commit: 23d702e38f4e9759651a5121188f7853e97989aa (patch)
tree: 406f67f5b111e6fc5f42bbba361ec67d6b60b9c7
parent: e69b38c4b7db4dd25204f6d4e6bb576c57aa3e6e (diff)
download: meta-intel-23d702e38f4e9759651a5121188f7853e97989aa.tar.gz
9 files changed, 657 insertions, 0 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch
new file mode 100644
index 00000000..09be8202
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch
@@ -0,0 +1,96 @@
+From 294ca2fd69a077b35acec9d498120d6cb0324dae Mon Sep 17 00:00:00 2001
+From: Naveen Saini <naveen.kumar.saini@intel.com>
+Date: Fri, 27 Aug 2021 11:53:27 +0800
+Subject: [PATCH 1/2] This patch is required to fix the crash referenced to in
+ #1767
+It is a port of the following llvm 11.0 commit : https://reviews.llvm.org/D76994.
+Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/41f13f1f64d2074ae7512fb23656c22585e912bd]
+Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
+---
+ .../CodeGen/SelectionDAG/LegalizeTypes.cpp    |  3 +-
+ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 31 ++++++++++++-------
+ 2 files changed, 21 insertions(+), 13 deletions(-)
+diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+index 63ddb59fce68..822da2183269 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+@@ -173,7 +173,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
+       }
+     }
+   }
+-
+#ifndef NDEBUG
+   // Checked that NewNodes are only used by other NewNodes.
+   for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+     SDNode *N = NewNodes[i];
+@@ -181,6 +181,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
+          UI != UE; ++UI)
+       assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+   }
+#endif
+ }
+ 
+ /// This is the main entry point for the type legalizer. This does a top-down
+diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+index faae14444d51..b908c5c58e9f 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+@@ -155,7 +155,9 @@ private:
+   const SDValue &getSDValue(TableId &Id) {
+     RemapId(Id);
+     assert(Id && "TableId should be non-zero");
+-    return IdToValueMap[Id];
+    auto I = IdToValueMap.find(Id);
+    assert(I != IdToValueMap.end() && "cannot find Id in map");
+    return I->second;
+   }
+ 
+ public:
+@@ -172,24 +174,29 @@ public:
+   bool run();
+ 
+   void NoteDeletion(SDNode *Old, SDNode *New) {
+    assert(Old != New && "node replaced with self");
+     for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
+       TableId NewId = getTableId(SDValue(New, i));
+       TableId OldId = getTableId(SDValue(Old, i));
+ 
+-      if (OldId != NewId)
+      if (OldId != NewId) {
+         ReplacedValues[OldId] = NewId;
+ 
+-      // Delete Node from tables.
+        // Delete Node from tables.  We cannot do this when OldId == NewId,
+        // because NewId can still have table references to it in
+        // ReplacedValues.
+        IdToValueMap.erase(OldId);
+        PromotedIntegers.erase(OldId);
+        ExpandedIntegers.erase(OldId);
+        SoftenedFloats.erase(OldId);
+        PromotedFloats.erase(OldId);
+        ExpandedFloats.erase(OldId);
+        ScalarizedVectors.erase(OldId);
+        SplitVectors.erase(OldId);
+        WidenedVectors.erase(OldId);
+      }
+
+       ValueToIdMap.erase(SDValue(Old, i));
+-      IdToValueMap.erase(OldId);
+-      PromotedIntegers.erase(OldId);
+-      ExpandedIntegers.erase(OldId);
+-      SoftenedFloats.erase(OldId);
+-      PromotedFloats.erase(OldId);
+-      ExpandedFloats.erase(OldId);
+-      ScalarizedVectors.erase(OldId);
+-      SplitVectors.erase(OldId);
+-      WidenedVectors.erase(OldId);
+     }
+   }
+ 
+-- 
+2.17.1
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch
new file mode 100644
index 00000000..72877d83
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch
@@ -0,0 +1,105 @@
+From d266087e8dba9e8fd4984e1cb85c20376e2c8ea3 Mon Sep 17 00:00:00 2001
+From: Naveen Saini <naveen.kumar.saini@intel.com>
+Date: Fri, 27 Aug 2021 11:56:01 +0800
+Subject: [PATCH 2/2] This patch is a fix for #1788.
+It is a port of the following llvm 11.0 commit: https://reviews.llvm.org/D81698
+This also needed part of another llvm 11.0 commit: https://reviews.llvm.org/D72975
+Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/aeb50448019ce1b1002f3781f9647d486320d83c]
+Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
+---
+ llvm/include/llvm/IR/PatternMatch.h           | 22 ++++++++++++---
+ .../InstCombine/InstructionCombining.cpp      | 27 +++++++++++++++++--
+ 2 files changed, 44 insertions(+), 5 deletions(-)
+diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
+index 6621fc9f819c..fb7ad93519f6 100644
+--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
+@@ -152,8 +152,10 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
+ 
+ struct apint_match {
+   const APInt *&Res;
+  bool AllowUndef;
+ 
+-  apint_match(const APInt *&R) : Res(R) {}
+  apint_match(const APInt *&Res, bool AllowUndef)
+    : Res(Res), AllowUndef(AllowUndef) {}
+ 
+   template <typename ITy> bool match(ITy *V) {
+     if (auto *CI = dyn_cast<ConstantInt>(V)) {
+@@ -162,7 +164,8 @@ struct apint_match {
+     }
+     if (V->getType()->isVectorTy())
+       if (const auto *C = dyn_cast<Constant>(V))
+-        if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
+       if (auto *CI = dyn_cast_or_null<ConstantInt>(
+                C->getSplatValue(AllowUndef))) {
+           Res = &CI->getValue();
+           return true;
+         }
+@@ -192,7 +195,20 @@ struct apfloat_match {
+ 
+ /// Match a ConstantInt or splatted ConstantVector, binding the
+ /// specified pointer to the contained APInt.
+-inline apint_match m_APInt(const APInt *&Res) { return Res; }
+inline apint_match m_APInt(const APInt *&Res) {
+  // Forbid undefs by default to maintain previous behavior.
+  return apint_match(Res, /* AllowUndef */ false);
+}
+
+/// Match APInt while allowing undefs in splat vector constants.
+inline apint_match m_APIntAllowUndef(const APInt *&Res) {
+  return apint_match(Res, /* AllowUndef */ true);
+}
+
+/// Match APInt while forbidding undefs in splat vector constants.
+inline apint_match m_APIntForbidUndef(const APInt *&Res) {
+  return apint_match(Res, /* AllowUndef */ false);
+}
+ 
+ /// Match a ConstantFP or splatted ConstantVector, binding the
+ /// specified pointer to the contained APFloat.
+diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+index bf32996d96e2..40a246b9d7a7 100644
+--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+@@ -925,8 +925,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
+   if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
+     if (CI->hasOneUse()) {
+       Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+-      if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+-          (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+
+      // FIXME: This is a hack to avoid infinite looping with min/max patterns.
+      //        We have to ensure that vector constants that only differ with
+      //        undef elements are treated as equivalent.
+      auto areLooselyEqual = [](Value *A, Value *B) {
+        if (A == B)
+          return true;
+
+        // Test for vector constants.
+        Constant *ConstA, *ConstB;
+        if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
+          return false;
+
+        // TODO: Deal with FP constants?
+        if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
+          return false;
+
+        // Compare for equality including undefs as equal.
+        auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
+        const APInt *C;
+        return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
+      };
+
+      if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
+          (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
+         return nullptr;
+     }
+   }
+-- 
+2.17.1
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch
new file mode 100644
index 00000000..fc6935a1
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch
@@ -0,0 +1,43 @@
+From 8f83e2b7618da7a98a30839a8f41a6dd82dec468 Mon Sep 17 00:00:00 2001
+From: Naveen Saini <naveen.kumar.saini@intel.com>
+Date: Fri, 27 Aug 2021 12:00:23 +0800
+Subject: [PATCH 1/2] This patch is required to fix stability problem #1793
+It's backport of the following llvm 11.0 commit: 120c5f1057dc50229f73bc75bbabf4df6ee50fef
+Upstream-Status: Backport
+Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
+---
+ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+index 2476fd26f250..2743acc89bca 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+@@ -10702,8 +10702,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
+   SDValue N0 = N->getOperand(0);
+   EVT VT = N->getValueType(0);
+ 
+  // zext_vector_inreg(undef) = 0 because the top bits will be zero.
+   if (N0.isUndef())
+-    return DAG.getUNDEF(VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
+ 
+   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+     return Res;
+@@ -10718,8 +10719,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
+   SDValue N0 = N->getOperand(0);
+   EVT VT = N->getValueType(0);
+ 
+  // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
+   if (N0.isUndef())
+-    return DAG.getUNDEF(VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
+ 
+   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+     return Res;
+-- 
+2.17.1
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch
new file mode 100644
index 00000000..e3e70107
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch
@@ -0,0 +1,34 @@
+From 62b05a69b4a185cd0b7535f19742686e19fcaf22 Mon Sep 17 00:00:00 2001
+From: Naveen Saini <naveen.kumar.saini@intel.com>
+Date: Fri, 27 Aug 2021 12:02:37 +0800
+Subject: [PATCH 2/2] Fix for #1844, affects avx512skx-i8x64 and
+ avx512skx-i16x32.
+It's a port of 11.0 commit edcfb47ff6d5562e22207f364c65f84302aa346b
+https://reviews.llvm.org/D76312
+Upstream-Status: Backport
+Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
+---
+ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+index 2743acc89bca..439a8367dabe 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+@@ -10841,7 +10841,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+ 
+   // Attempt to pre-truncate BUILD_VECTOR sources.
+   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
+-      TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
+      TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
+      // Avoid creating illegal types if running after type legalizer.
+      (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
+     SDLoc DL(N);
+     EVT SVT = VT.getScalarType();
+     SmallVector<SDValue, 8> TruncOps;
+-- 
+2.17.1
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch
new file mode 100644
index 00000000..8aca5fbf
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch
@@ -0,0 +1,40 @@
+From cc4301f82ca1bde1d438c3708de285b0ab8c72d3 Mon Sep 17 00:00:00 2001
+From: Naveen Saini <naveen.kumar.saini@intel.com>
+Date: Fri, 27 Aug 2021 12:07:25 +0800
+Subject: [PATCH 1/2] [X86] createVariablePermute - handle case where recursive
+ createVariablePermute call fails
+Account for the case where a recursive createVariablePermute call with a wider vector type fails.
+Original test case from @craig.topper (Craig Topper)
+Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804]
+Signed-off-by: Simon Pilgrim <llvm-dev@redking.me.uk>
+Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
+---
+ llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
+index c8720d9ae3a6..63eb050e9b3a 100644
+--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
+@@ -9571,9 +9571,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
+       IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
+       IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
+                                   Subtarget, DAG, SDLoc(IndicesVec));
+-      return extractSubVector(
+-          createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0,
+-          DAG, DL, SizeInBits);
+      SDValue NewSrcVec =
+          createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
+      if (NewSrcVec)
+        return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits);
+      return SDValue();
+     } else if (SrcVec.getValueSizeInBits() < SizeInBits) {
+       // Widen smaller SrcVec to match VT.
+       SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
+-- 
+2.17.1
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch
new file mode 100644
index 00000000..e03c279f
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch
@@ -0,0 +1,61 @@
+From 9cdff0785d5cf9effc8e922d3330311c4d3dda78 Mon Sep 17 00:00:00 2001
+From: Naveen Saini <naveen.kumar.saini@intel.com>
+Date: Fri, 27 Aug 2021 12:09:42 +0800
+Subject: [PATCH 2/2] This patch is needed for avx512skx-i8x64 and
+ avx512skx-i16x32 targets.
+This is combination of two commits:
+- 0cd6712a7af0fa2702b5d4cc733500eb5e62e7d0 - stability fix.
+- d8ad7cc0885f32104a7cd83c77191aec15fd684f - performance follow up.
+Upstream-Status: Backport
+Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
+---
+ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 +++++++++++++++++--
+ 1 file changed, 21 insertions(+), 2 deletions(-)
+diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+index 439a8367dabe..b1639c7f275d 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+@@ -18471,6 +18471,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
+ 
+   // Allow targets to opt-out.
+   EVT VT = Extract->getValueType(0);
+
+  // We can only create byte sized loads.
+  if (!VT.isByteSized())
+    return SDValue();
+
+  unsigned Index = ExtIdx->getZExtValue();
+  unsigned NumElts = VT.getVectorNumElements();
+
+  // If the index is a multiple of the extract element count, we can offset the
+  // address by the store size multiplied by the subvector index. Otherwise if
+  // the scalar type is byte sized, we can just use the index multiplied by
+  // the element size in bytes as the offset.
+  unsigned Offset;
+  if (Index % NumElts == 0)
+    Offset = (Index / NumElts) * VT.getStoreSize();
+  else if (VT.getScalarType().isByteSized())
+    Offset = Index * VT.getScalarType().getStoreSize();
+  else
+    return SDValue();
+
+   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
+     return SDValue();
+@@ -18478,8 +18498,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
+   // The narrow load will be offset from the base address of the old load if
+   // we are extracting from something besides index 0 (little-endian).
+   SDLoc DL(Extract);
+-  SDValue BaseAddr = Ld->getOperand(1);
+-  unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
+  SDValue BaseAddr = Ld->getBasePtr();
+ 
+   // TODO: Use "BaseIndexOffset" to make this more effective.
+   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
+-- 
+2.17.1
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch
new file mode 100644
index 00000000..d1768216
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch
@@ -0,0 +1,97 @@
+From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001
+From: Naveen Saini <naveen.kumar.saini@intel.com>
+Date: Fri, 27 Aug 2021 12:13:00 +0800
+Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we
+ store zeros in the rest of the byte
+We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0.
+I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue.
+Should fix PR48147.
+Reviewed By: RKSimon
+Differential Revision: https://reviews.llvm.org/D9129
+Upstream-Status: Backport
+Signed-off-by:Craig Topper <craig.topper@sifive.com>
+Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
+---
+ llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------
+ llvm/lib/Target/X86/X86InstrAVX512.td   |  2 --
+ 2 files changed, 14 insertions(+), 8 deletions(-)
+diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
+index 63eb050e9b3a..96b5e2cfbd82 100644
+--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
+@@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
+   // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
+   if (StoredVal.getValueType().isVector() &&
+       StoredVal.getValueType().getVectorElementType() == MVT::i1) {
+-    assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
+-           "Unexpected VT");
+    unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
+    assert(NumElts <= 8 && "Unexpected VT");
+     assert(!St->isTruncatingStore() && "Expected non-truncating store");
+     assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
+            "Expected AVX512F without AVX512DQI");
+ 
+    // We must pad with zeros to ensure we store zeroes to any unused bits.
+     StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+                             DAG.getUNDEF(MVT::v16i1), StoredVal,
+                             DAG.getIntPtrConstant(0, dl));
+     StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
+     StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
+    // Make sure we store zeros in the extra bits.
+    if (NumElts < 8)
+      StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
+                                         MVT::getIntegerVT(NumElts));
+ 
+     return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+                         St->getPointerInfo(), St->getAlignment(),
+@@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
+ 
+     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
+     StoredVal = DAG.getBitcast(NewVT, StoredVal);
+-
+-    return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+    SDValue Val = StoredVal.getOperand(0);
+    // We must store zeros to the unused bits.
+    Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
+    return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(),
+                         St->getPointerInfo(), St->getAlignment(),
+                         St->getMemOperand()->getFlags());
+   }
+@@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
+   }
+ 
+   // Widen v2i1/v4i1 stores to v8i1.
+-  if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
+  if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
+       Subtarget.hasAVX512()) {
+     unsigned NumConcats = 8 / VT.getVectorNumElements();
+-    SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
+    // We must store zeros to the unused bits.
+    SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
+     Ops[0] = StoredVal;
+     StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
+     return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
+index 32f012033fb0..d3b92183f87b 100644
+--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
+@@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
+ 
+ // Load/store kreg
+ let Predicates = [HasDQI] in {
+-  def : Pat<(store VK1:$src, addr:$dst),
+-            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
+ 
+   def : Pat<(v1i1 (load addr:$src)),
+             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
+-- 
+2.17.1
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch
new file mode 100644
index 00000000..03b40e9b
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch
@@ -0,0 +1,173 @@
+From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001
+From: Naveen Saini <naveen.kumar.saini@intel.com>
+Date: Fri, 27 Aug 2021 12:15:09 +0800
+Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via
+ getRegisterTypeForCallingConv rather than using CCPromoteToType in the td
+ file
+    Previously we tried to promote these to xmm/ymm/zmm by promoting
+    in the X86CallingConv.td file. But this breaks when we run out
+    of xmm/ymm/zmm registers and need to fall back to memory. We end
+    up trying to create a non-sensical scalar to vector. This lead
+    to an assertion. The new tests in avx512-calling-conv.ll all
+    trigger this assertion.
+    Since we really want to treat these types like we do on avx2,
+    it seems better to promote them before the calling convention
+    code gets involved. Except when the calling convention is one
+    that passes the vXi1 type in a k register.
+    The changes in avx512-regcall-Mask.ll are because we indicated
+    that xmm/ymm/zmm types should be passed indirectly for the
+    Win64 ABI before we go to the common lines that promoted the
+    vXi1 types. This caused the promoted types to be picked up by
+    the default calling convention code. Now we promote them earlier
+    so they get passed indirectly as though they were xmm/ymm/zmm.
+    Differential Revision: https://reviews.llvm.org/D75154
+Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc]
+Signed-off-by:Craig Topper <craig.topper@intel.com>
+Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
+---
+ llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++--------
+ 1 file changed, 61 insertions(+), 29 deletions(-)
+diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
+index 96b5e2cfbd82..d5de94aeb8a2 100644
+--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
+@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
+   return TargetLoweringBase::getPreferredVectorAction(VT);
+ }
+ 
+static std::pair<MVT, unsigned>
+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
+                                 const X86Subtarget &Subtarget) {
+  // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
+  // convention is one that uses k registers.
+  if (NumElts == 2)
+    return {MVT::v2i64, 1};
+  if (NumElts == 4)
+    return {MVT::v4i32, 1};
+  if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
+      CC != CallingConv::Intel_OCL_BI)
+    return {MVT::v8i16, 1};
+  if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
+      CC != CallingConv::Intel_OCL_BI)
+    return {MVT::v16i8, 1};
+  // v32i1 passes in ymm unless we have BWI and the calling convention is
+  // regcall.
+  if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
+    return {MVT::v32i8, 1};
+  // Split v64i1 vectors if we don't have v64i8 available.
+  if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
+    if (Subtarget.useAVX512Regs())
+      return {MVT::v64i8, 1};
+    return {MVT::v32i8, 2};
+  }
+
+  // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+  if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
+      NumElts > 64)
+    return {MVT::i8, NumElts};
+
+  return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
+}
+
+ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+                                                      CallingConv::ID CC,
+                                                      EVT VT) const {
+-  // v32i1 vectors should be promoted to v32i8 to match avx2.
+-  if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
+-    return MVT::v32i8;
+-  // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+-      Subtarget.hasAVX512() &&
+-      (!isPowerOf2_32(VT.getVectorNumElements()) ||
+-       (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
+-       (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
+-    return MVT::i8;
+-  // Split v64i1 vectors if we don't have v64i8 available.
+-  if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+-      CC != CallingConv::X86_RegCall)
+-    return MVT::v32i1;
+      Subtarget.hasAVX512()) {
+    unsigned NumElts = VT.getVectorNumElements();
+
+    MVT RegisterVT;
+    unsigned NumRegisters;
+    std::tie(RegisterVT, NumRegisters) =
+        handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+    if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+      return RegisterVT;
+  }
+
+   // FIXME: Should we just make these types legal and custom split operations?
+   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
+       Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
+     return MVT::v16i32;
+
+   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+ }
+ 
+ unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+                                                           CallingConv::ID CC,
+                                                           EVT VT) const {
+-  // v32i1 vectors should be promoted to v32i8 to match avx2.
+-  if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
+-    return 1;
+-  // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+-      Subtarget.hasAVX512() &&
+-      (!isPowerOf2_32(VT.getVectorNumElements()) ||
+-       (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
+-       (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
+-    return VT.getVectorNumElements();
+-  // Split v64i1 vectors if we don't have v64i8 available.
+-  if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+-      CC != CallingConv::X86_RegCall)
+-    return 2;
+      Subtarget.hasAVX512()) {
+    unsigned NumElts = VT.getVectorNumElements();
+
+    MVT RegisterVT;
+    unsigned NumRegisters;
+    std::tie(RegisterVT, NumRegisters) =
+        handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+    if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+      return NumRegisters;
+  }
+
+   // FIXME: Should we just make these types legal and custom split operations?
+   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
+       Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
+     return 1;
+
+   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+ }
+ 
+@@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
+   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+       Subtarget.hasAVX512() &&
+       (!isPowerOf2_32(VT.getVectorNumElements()) ||
+-       (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
+-       (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
+       (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
+       VT.getVectorNumElements() > 64)) {
+     RegisterVT = MVT::i8;
+     IntermediateVT = MVT::i1;
+     NumIntermediates = VT.getVectorNumElements();
+@@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
+   // Split v64i1 vectors if we don't have v64i8 available.
+   if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+       CC != CallingConv::X86_RegCall) {
+-    RegisterVT = MVT::v32i1;
+    RegisterVT = MVT::v32i8;
+     IntermediateVT = MVT::v32i1;
+     NumIntermediates = 2;
+     return 2;
+-- 
+2.17.1
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
index b144411d..3f304215 100644
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
@@ -18,6 +18,14 @@ SRC_URI_LLVM10_PATCHES = " \
                   file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
                   file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
                   file://llvm10-0007-support-cl_ext_float_atomics.patch \
+                   file://llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch \
+                   file://llvm10-0009-ispc-10_0_fix_for_1788.patch \
+                   file://llvm10-0010-ispc-10_0_fix_for_1793.patch \
+                   file://llvm10-0011-ispc-10_0_fix_for_1844.patch \
+                   file://llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch \
+                   file://llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch \
+                   file://llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch \
+                   file://llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch \
                   "
 SRC_URI_LLVM11_PATCHES = " \
author	Naveen Saini <naveen.kumar.saini@intel.com>	2021-08-27 15:28:28 +0800
committer	Anuj Mittal <anuj.mittal@intel.com>	2021-08-27 23:28:04 +0800
commit	23d702e38f4e9759651a5121188f7853e97989aa (patch)
tree	406f67f5b111e6fc5f42bbba361ec67d6b60b9c7
parent	e69b38c4b7db4dd25204f6d4e6bb576c57aa3e6e (diff)
download	meta-intel-23d702e38f4e9759651a5121188f7853e97989aa.tar.gz