From 23d702e38f4e9759651a5121188f7853e97989aa Mon Sep 17 00:00:00 2001 From: Naveen Saini Date: Fri, 27 Aug 2021 15:28:28 +0800 Subject: llvm/10.0.0: apply ispc recommended patches ISPC recommends building LLVM 10 with some additional patches to work around some bugs in this version. Add those patches to our build as well. https://github.com/ispc/ispc/tree/v1.16.1/llvm_patches Signed-off-by: Naveen Saini Signed-off-by: Anuj Mittal --- .../llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch | 96 ++++++++++++ .../files/llvm10-0009-ispc-10_0_fix_for_1788.patch | 105 +++++++++++++ .../files/llvm10-0010-ispc-10_0_fix_for_1793.patch | 43 +++++ .../files/llvm10-0011-ispc-10_0_fix_for_1844.patch | 34 ++++ ...0-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch | 40 +++++ ...10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch | 61 ++++++++ ...014-ispc-10_0_packed_load_store_avx512skx.patch | 97 ++++++++++++ ...-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch | 173 +++++++++++++++++++++ .../clang/llvm-project-source.bbappend | 8 + 9 files changed, 657 insertions(+) create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch create mode 100644 dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch new file mode 100644 index 00000000..09be8202 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch @@ -0,0 +1,96 @@ +From 294ca2fd69a077b35acec9d498120d6cb0324dae Mon Sep 17 00:00:00 2001 +From: Naveen Saini +Date: Fri, 27 Aug 2021 11:53:27 +0800 +Subject: [PATCH 1/2] This patch is required to fix the crash referenced to in + #1767 + +It is a port of the following llvm 11.0 commit : https://reviews.llvm.org/D76994. + +Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/41f13f1f64d2074ae7512fb23656c22585e912bd] + +Signed-off-by: Naveen Saini +--- + .../CodeGen/SelectionDAG/LegalizeTypes.cpp | 3 +- + llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 31 ++++++++++++------- + 2 files changed, 21 insertions(+), 13 deletions(-) + +diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +index 63ddb59fce68..822da2183269 100644 +--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp ++++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +@@ -173,7 +173,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { + } + } + } +- ++#ifndef NDEBUG + // Checked that NewNodes are only used by other NewNodes. + for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { + SDNode *N = NewNodes[i]; +@@ -181,6 +181,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { + UI != UE; ++UI) + assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); + } ++#endif + } + + /// This is the main entry point for the type legalizer. This does a top-down +diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +index faae14444d51..b908c5c58e9f 100644 +--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h ++++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +@@ -155,7 +155,9 @@ private: + const SDValue &getSDValue(TableId &Id) { + RemapId(Id); + assert(Id && "TableId should be non-zero"); +- return IdToValueMap[Id]; ++ auto I = IdToValueMap.find(Id); ++ assert(I != IdToValueMap.end() && "cannot find Id in map"); ++ return I->second; + } + + public: +@@ -172,24 +174,29 @@ public: + bool run(); + + void NoteDeletion(SDNode *Old, SDNode *New) { ++ assert(Old != New && "node replaced with self"); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { + TableId NewId = getTableId(SDValue(New, i)); + TableId OldId = getTableId(SDValue(Old, i)); + +- if (OldId != NewId) ++ if (OldId != NewId) { + ReplacedValues[OldId] = NewId; + +- // Delete Node from tables. ++ // Delete Node from tables. We cannot do this when OldId == NewId, ++ // because NewId can still have table references to it in ++ // ReplacedValues. ++ IdToValueMap.erase(OldId); ++ PromotedIntegers.erase(OldId); ++ ExpandedIntegers.erase(OldId); ++ SoftenedFloats.erase(OldId); ++ PromotedFloats.erase(OldId); ++ ExpandedFloats.erase(OldId); ++ ScalarizedVectors.erase(OldId); ++ SplitVectors.erase(OldId); ++ WidenedVectors.erase(OldId); ++ } ++ + ValueToIdMap.erase(SDValue(Old, i)); +- IdToValueMap.erase(OldId); +- PromotedIntegers.erase(OldId); +- ExpandedIntegers.erase(OldId); +- SoftenedFloats.erase(OldId); +- PromotedFloats.erase(OldId); +- ExpandedFloats.erase(OldId); +- ScalarizedVectors.erase(OldId); +- SplitVectors.erase(OldId); +- WidenedVectors.erase(OldId); + } + } + +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch new file mode 100644 index 00000000..72877d83 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch @@ -0,0 +1,105 @@ +From d266087e8dba9e8fd4984e1cb85c20376e2c8ea3 Mon Sep 17 00:00:00 2001 +From: Naveen Saini +Date: Fri, 27 Aug 2021 11:56:01 +0800 +Subject: [PATCH 2/2] This patch is a fix for #1788. + +It is a port of the following llvm 11.0 commit: https://reviews.llvm.org/D81698 +This also needed part of another llvm 11.0 commit: https://reviews.llvm.org/D72975 + +Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/aeb50448019ce1b1002f3781f9647d486320d83c] + +Signed-off-by: Naveen Saini +--- + llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++++++--- + .../InstCombine/InstructionCombining.cpp | 27 +++++++++++++++++-- + 2 files changed, 44 insertions(+), 5 deletions(-) + +diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h +index 6621fc9f819c..fb7ad93519f6 100644 +--- a/llvm/include/llvm/IR/PatternMatch.h ++++ b/llvm/include/llvm/IR/PatternMatch.h +@@ -152,8 +152,10 @@ inline match_combine_and m_CombineAnd(const LTy &L, const RTy &R) { + + struct apint_match { + const APInt *&Res; ++ bool AllowUndef; + +- apint_match(const APInt *&R) : Res(R) {} ++ apint_match(const APInt *&Res, bool AllowUndef) ++ : Res(Res), AllowUndef(AllowUndef) {} + + template bool match(ITy *V) { + if (auto *CI = dyn_cast(V)) { +@@ -162,7 +164,8 @@ struct apint_match { + } + if (V->getType()->isVectorTy()) + if (const auto *C = dyn_cast(V)) +- if (auto *CI = dyn_cast_or_null(C->getSplatValue())) { ++ if (auto *CI = dyn_cast_or_null( ++ C->getSplatValue(AllowUndef))) { + Res = &CI->getValue(); + return true; + } +@@ -192,7 +195,20 @@ struct apfloat_match { + + /// Match a ConstantInt or splatted ConstantVector, binding the + /// specified pointer to the contained APInt. +-inline apint_match m_APInt(const APInt *&Res) { return Res; } ++inline apint_match m_APInt(const APInt *&Res) { ++ // Forbid undefs by default to maintain previous behavior. ++ return apint_match(Res, /* AllowUndef */ false); ++} ++ ++/// Match APInt while allowing undefs in splat vector constants. ++inline apint_match m_APIntAllowUndef(const APInt *&Res) { ++ return apint_match(Res, /* AllowUndef */ true); ++} ++ ++/// Match APInt while forbidding undefs in splat vector constants. ++inline apint_match m_APIntForbidUndef(const APInt *&Res) { ++ return apint_match(Res, /* AllowUndef */ false); ++} + + /// Match a ConstantFP or splatted ConstantVector, binding the + /// specified pointer to the contained APFloat. +diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +index bf32996d96e2..40a246b9d7a7 100644 +--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp ++++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +@@ -925,8 +925,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { + if (auto *CI = dyn_cast(SI->getCondition())) { + if (CI->hasOneUse()) { + Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); +- if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || +- (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) ++ ++ // FIXME: This is a hack to avoid infinite looping with min/max patterns. ++ // We have to ensure that vector constants that only differ with ++ // undef elements are treated as equivalent. ++ auto areLooselyEqual = [](Value *A, Value *B) { ++ if (A == B) ++ return true; ++ ++ // Test for vector constants. ++ Constant *ConstA, *ConstB; ++ if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB))) ++ return false; ++ ++ // TODO: Deal with FP constants? ++ if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType()) ++ return false; ++ ++ // Compare for equality including undefs as equal. ++ auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB); ++ const APInt *C; ++ return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue(); ++ }; ++ ++ if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) || ++ (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1))) + return nullptr; + } + } +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch new file mode 100644 index 00000000..fc6935a1 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch @@ -0,0 +1,43 @@ +From 8f83e2b7618da7a98a30839a8f41a6dd82dec468 Mon Sep 17 00:00:00 2001 +From: Naveen Saini +Date: Fri, 27 Aug 2021 12:00:23 +0800 +Subject: [PATCH 1/2] This patch is required to fix stability problem #1793 + +It's backport of the following llvm 11.0 commit: 120c5f1057dc50229f73bc75bbabf4df6ee50fef + +Upstream-Status: Backport + +Signed-off-by: Naveen Saini +--- + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +index 2476fd26f250..2743acc89bca 100644 +--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ++++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +@@ -10702,8 +10702,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + ++ // zext_vector_inreg(undef) = 0 because the top bits will be zero. + if (N0.isUndef()) +- return DAG.getUNDEF(VT); ++ return DAG.getConstant(0, SDLoc(N), VT); + + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; +@@ -10718,8 +10719,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + ++ // sext_vector_inreg(undef) = 0 because the top bit will all be the same. + if (N0.isUndef()) +- return DAG.getUNDEF(VT); ++ return DAG.getConstant(0, SDLoc(N), VT); + + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) + return Res; +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch new file mode 100644 index 00000000..e3e70107 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch @@ -0,0 +1,34 @@ +From 62b05a69b4a185cd0b7535f19742686e19fcaf22 Mon Sep 17 00:00:00 2001 +From: Naveen Saini +Date: Fri, 27 Aug 2021 12:02:37 +0800 +Subject: [PATCH 2/2] Fix for #1844, affects avx512skx-i8x64 and + avx512skx-i16x32. + +It's a port of 11.0 commit edcfb47ff6d5562e22207f364c65f84302aa346b +https://reviews.llvm.org/D76312 + +Upstream-Status: Backport + +Signed-off-by: Naveen Saini +--- + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +index 2743acc89bca..439a8367dabe 100644 +--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ++++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +@@ -10841,7 +10841,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { + + // Attempt to pre-truncate BUILD_VECTOR sources. + if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && +- TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) { ++ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) && ++ // Avoid creating illegal types if running after type legalizer. ++ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) { + SDLoc DL(N); + EVT SVT = VT.getScalarType(); + SmallVector TruncOps; +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch new file mode 100644 index 00000000..8aca5fbf --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch @@ -0,0 +1,40 @@ +From cc4301f82ca1bde1d438c3708de285b0ab8c72d3 Mon Sep 17 00:00:00 2001 +From: Naveen Saini +Date: Fri, 27 Aug 2021 12:07:25 +0800 +Subject: [PATCH 1/2] [X86] createVariablePermute - handle case where recursive + createVariablePermute call fails + +Account for the case where a recursive createVariablePermute call with a wider vector type fails. + +Original test case from @craig.topper (Craig Topper) + +Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804] + +Signed-off-by: Simon Pilgrim +Signed-off-by: Naveen Saini +--- + llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp +index c8720d9ae3a6..63eb050e9b3a 100644 +--- a/llvm/lib/Target/X86/X86ISelLowering.cpp ++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp +@@ -9571,9 +9571,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, + IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); + IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false, + Subtarget, DAG, SDLoc(IndicesVec)); +- return extractSubVector( +- createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0, +- DAG, DL, SizeInBits); ++ SDValue NewSrcVec = ++ createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); ++ if (NewSrcVec) ++ return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits); ++ return SDValue(); + } else if (SrcVec.getValueSizeInBits() < SizeInBits) { + // Widen smaller SrcVec to match VT. + SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec)); +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch new file mode 100644 index 00000000..e03c279f --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch @@ -0,0 +1,61 @@ +From 9cdff0785d5cf9effc8e922d3330311c4d3dda78 Mon Sep 17 00:00:00 2001 +From: Naveen Saini +Date: Fri, 27 Aug 2021 12:09:42 +0800 +Subject: [PATCH 2/2] This patch is needed for avx512skx-i8x64 and + avx512skx-i16x32 targets. + +This is combination of two commits: +- 0cd6712a7af0fa2702b5d4cc733500eb5e62e7d0 - stability fix. +- d8ad7cc0885f32104a7cd83c77191aec15fd684f - performance follow up. + +Upstream-Status: Backport + +Signed-off-by: Naveen Saini +--- + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 +++++++++++++++++-- + 1 file changed, 21 insertions(+), 2 deletions(-) + +diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +index 439a8367dabe..b1639c7f275d 100644 +--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ++++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +@@ -18471,6 +18471,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { + + // Allow targets to opt-out. + EVT VT = Extract->getValueType(0); ++ ++ // We can only create byte sized loads. ++ if (!VT.isByteSized()) ++ return SDValue(); ++ ++ unsigned Index = ExtIdx->getZExtValue(); ++ unsigned NumElts = VT.getVectorNumElements(); ++ ++ // If the index is a multiple of the extract element count, we can offset the ++ // address by the store size multiplied by the subvector index. Otherwise if ++ // the scalar type is byte sized, we can just use the index multiplied by ++ // the element size in bytes as the offset. ++ unsigned Offset; ++ if (Index % NumElts == 0) ++ Offset = (Index / NumElts) * VT.getStoreSize(); ++ else if (VT.getScalarType().isByteSized()) ++ Offset = Index * VT.getScalarType().getStoreSize(); ++ else ++ return SDValue(); ++ + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) + return SDValue(); +@@ -18478,8 +18498,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { + // The narrow load will be offset from the base address of the old load if + // we are extracting from something besides index 0 (little-endian). + SDLoc DL(Extract); +- SDValue BaseAddr = Ld->getOperand(1); +- unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); ++ SDValue BaseAddr = Ld->getBasePtr(); + + // TODO: Use "BaseIndexOffset" to make this more effective. + SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch new file mode 100644 index 00000000..d1768216 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch @@ -0,0 +1,97 @@ +From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001 +From: Naveen Saini +Date: Fri, 27 Aug 2021 12:13:00 +0800 +Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we + store zeros in the rest of the byte + +We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0. + +I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue. + +Should fix PR48147. + +Reviewed By: RKSimon + +Differential Revision: https://reviews.llvm.org/D9129 + +Upstream-Status: Backport + +Signed-off-by:Craig Topper +Signed-off-by: Naveen Saini +--- + llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------ + llvm/lib/Target/X86/X86InstrAVX512.td | 2 -- + 2 files changed, 14 insertions(+), 8 deletions(-) + +diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp +index 63eb050e9b3a..96b5e2cfbd82 100644 +--- a/llvm/lib/Target/X86/X86ISelLowering.cpp ++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp +@@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, + // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores. + if (StoredVal.getValueType().isVector() && + StoredVal.getValueType().getVectorElementType() == MVT::i1) { +- assert(StoredVal.getValueType().getVectorNumElements() <= 8 && +- "Unexpected VT"); ++ unsigned NumElts = StoredVal.getValueType().getVectorNumElements(); ++ assert(NumElts <= 8 && "Unexpected VT"); + assert(!St->isTruncatingStore() && "Expected non-truncating store"); + assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && + "Expected AVX512F without AVX512DQI"); + ++ // We must pad with zeros to ensure we store zeroes to any unused bits. + StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, + DAG.getUNDEF(MVT::v16i1), StoredVal, + DAG.getIntPtrConstant(0, dl)); + StoredVal = DAG.getBitcast(MVT::i16, StoredVal); + StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal); ++ // Make sure we store zeros in the extra bits. ++ if (NumElts < 8) ++ StoredVal = DAG.getZeroExtendInReg(StoredVal, dl, ++ MVT::getIntegerVT(NumElts)); + + return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), + St->getPointerInfo(), St->getAlignment(), +@@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, + + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); + StoredVal = DAG.getBitcast(NewVT, StoredVal); +- +- return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), ++ SDValue Val = StoredVal.getOperand(0); ++ // We must store zeros to the unused bits. ++ Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1); ++ return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(), + St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags()); + } +@@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, + } + + // Widen v2i1/v4i1 stores to v8i1. +- if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && ++ if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && + Subtarget.hasAVX512()) { + unsigned NumConcats = 8 / VT.getVectorNumElements(); +- SmallVector Ops(NumConcats, DAG.getUNDEF(VT)); ++ // We must store zeros to the unused bits. ++ SmallVector Ops(NumConcats, DAG.getConstant(0, dl, VT)); + Ops[0] = StoredVal; + StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); + return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), +diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td +index 32f012033fb0..d3b92183f87b 100644 +--- a/llvm/lib/Target/X86/X86InstrAVX512.td ++++ b/llvm/lib/Target/X86/X86InstrAVX512.td +@@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), + + // Load/store kreg + let Predicates = [HasDQI] in { +- def : Pat<(store VK1:$src, addr:$dst), +- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; + + def : Pat<(v1i1 (load addr:$src)), + (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch new file mode 100644 index 00000000..03b40e9b --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch @@ -0,0 +1,173 @@ +From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001 +From: Naveen Saini +Date: Fri, 27 Aug 2021 12:15:09 +0800 +Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via + getRegisterTypeForCallingConv rather than using CCPromoteToType in the td + file + + Previously we tried to promote these to xmm/ymm/zmm by promoting + in the X86CallingConv.td file. But this breaks when we run out + of xmm/ymm/zmm registers and need to fall back to memory. We end + up trying to create a non-sensical scalar to vector. This lead + to an assertion. The new tests in avx512-calling-conv.ll all + trigger this assertion. + + Since we really want to treat these types like we do on avx2, + it seems better to promote them before the calling convention + code gets involved. Except when the calling convention is one + that passes the vXi1 type in a k register. + + The changes in avx512-regcall-Mask.ll are because we indicated + that xmm/ymm/zmm types should be passed indirectly for the + Win64 ABI before we go to the common lines that promoted the + vXi1 types. This caused the promoted types to be picked up by + the default calling convention code. Now we promote them earlier + so they get passed indirectly as though they were xmm/ymm/zmm. + + Differential Revision: https://reviews.llvm.org/D75154 + +Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc] + +Signed-off-by:Craig Topper +Signed-off-by: Naveen Saini +--- + llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++-------- + 1 file changed, 61 insertions(+), 29 deletions(-) + +diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp +index 96b5e2cfbd82..d5de94aeb8a2 100644 +--- a/llvm/lib/Target/X86/X86ISelLowering.cpp ++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp +@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { + return TargetLoweringBase::getPreferredVectorAction(VT); + } + ++static std::pair ++handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, ++ const X86Subtarget &Subtarget) { ++ // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling ++ // convention is one that uses k registers. ++ if (NumElts == 2) ++ return {MVT::v2i64, 1}; ++ if (NumElts == 4) ++ return {MVT::v4i32, 1}; ++ if (NumElts == 8 && CC != CallingConv::X86_RegCall && ++ CC != CallingConv::Intel_OCL_BI) ++ return {MVT::v8i16, 1}; ++ if (NumElts == 16 && CC != CallingConv::X86_RegCall && ++ CC != CallingConv::Intel_OCL_BI) ++ return {MVT::v16i8, 1}; ++ // v32i1 passes in ymm unless we have BWI and the calling convention is ++ // regcall. ++ if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall)) ++ return {MVT::v32i8, 1}; ++ // Split v64i1 vectors if we don't have v64i8 available. ++ if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) { ++ if (Subtarget.useAVX512Regs()) ++ return {MVT::v64i8, 1}; ++ return {MVT::v32i8, 2}; ++ } ++ ++ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. ++ if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) || ++ NumElts > 64) ++ return {MVT::i8, NumElts}; ++ ++ return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0}; ++} ++ + MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const { +- // v32i1 vectors should be promoted to v32i8 to match avx2. +- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) +- return MVT::v32i8; +- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. + if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && +- Subtarget.hasAVX512() && +- (!isPowerOf2_32(VT.getVectorNumElements()) || +- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || +- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) +- return MVT::i8; +- // Split v64i1 vectors if we don't have v64i8 available. +- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && +- CC != CallingConv::X86_RegCall) +- return MVT::v32i1; ++ Subtarget.hasAVX512()) { ++ unsigned NumElts = VT.getVectorNumElements(); ++ ++ MVT RegisterVT; ++ unsigned NumRegisters; ++ std::tie(RegisterVT, NumRegisters) = ++ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); ++ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) ++ return RegisterVT; ++ } ++ + // FIXME: Should we just make these types legal and custom split operations? + if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI && + Subtarget.useAVX512Regs() && !Subtarget.hasBWI()) + return MVT::v16i32; ++ + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); + } + + unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const { +- // v32i1 vectors should be promoted to v32i8 to match avx2. +- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) +- return 1; +- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. + if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && +- Subtarget.hasAVX512() && +- (!isPowerOf2_32(VT.getVectorNumElements()) || +- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || +- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) +- return VT.getVectorNumElements(); +- // Split v64i1 vectors if we don't have v64i8 available. +- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && +- CC != CallingConv::X86_RegCall) +- return 2; ++ Subtarget.hasAVX512()) { ++ unsigned NumElts = VT.getVectorNumElements(); ++ ++ MVT RegisterVT; ++ unsigned NumRegisters; ++ std::tie(RegisterVT, NumRegisters) = ++ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); ++ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) ++ return NumRegisters; ++ } ++ + // FIXME: Should we just make these types legal and custom split operations? + if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI && + Subtarget.useAVX512Regs() && !Subtarget.hasBWI()) + return 1; ++ + return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); + } + +@@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( + if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && + Subtarget.hasAVX512() && + (!isPowerOf2_32(VT.getVectorNumElements()) || +- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || +- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) { ++ (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) || ++ VT.getVectorNumElements() > 64)) { + RegisterVT = MVT::i8; + IntermediateVT = MVT::i1; + NumIntermediates = VT.getVectorNumElements(); +@@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( + // Split v64i1 vectors if we don't have v64i8 available. + if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && + CC != CallingConv::X86_RegCall) { +- RegisterVT = MVT::v32i1; ++ RegisterVT = MVT::v32i8; + IntermediateVT = MVT::v32i1; + NumIntermediates = 2; + return 2; +-- +2.17.1 + diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend index b144411d..3f304215 100644 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend @@ -18,6 +18,14 @@ SRC_URI_LLVM10_PATCHES = " \ file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \ file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \ file://llvm10-0007-support-cl_ext_float_atomics.patch \ + file://llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch \ + file://llvm10-0009-ispc-10_0_fix_for_1788.patch \ + file://llvm10-0010-ispc-10_0_fix_for_1793.patch \ + file://llvm10-0011-ispc-10_0_fix_for_1844.patch \ + file://llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch \ + file://llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch \ + file://llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch \ + file://llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch \ " SRC_URI_LLVM11_PATCHES = " \ -- cgit v1.2.3-54-g00ecf