2 files changed, 142 insertions, 0 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0007-ispc-11_0_11_1_disable-A-B-A-B-in-InstCombine.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0007-ispc-11_0_11_1_disable-A-B-A-B-in-InstCombine.patch
new file mode 100644
index 00000000..dcf26bc9
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0007-ispc-11_0_11_1_disable-A-B-A-B-in-InstCombine.patch
@@ -0,0 +1,47 @@
+From ef2b930a8e33078449737a93e7d522b2280ec58c Mon Sep 17 00:00:00 2001
+From: Naveen Saini <naveen.kumar.saini@intel.com>
+Date: Fri, 27 Aug 2021 11:39:16 +0800
+Subject: [PATCH 1/2] This patch is needed for ISPC for Gen only
+Transformation of add to or is not safe for VC backend.
+Upstream-Status: Backport [Taken from ispc,https://github.com/ispc/ispc/blob/v1.16.1/llvm_patches/11_0_11_1_disable-A-B-A-B-in-InstCombine.patch]
+Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
+---
+ .../lib/Transforms/InstCombine/InstCombineAddSub.cpp | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+index a7f5e0a7774d..bf02b0f70827 100644
+--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+@@ -15,6 +15,7 @@
+ #include "llvm/ADT/APInt.h"
+ #include "llvm/ADT/STLExtras.h"
+ #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
+ #include "llvm/Analysis/InstructionSimplify.h"
+ #include "llvm/Analysis/ValueTracking.h"
+ #include "llvm/IR/Constant.h"
+@@ -1324,10 +1325,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
+       return BinaryOperator::CreateSRem(RHS, NewRHS);
+     }
+   }
+-
+-  // A+B --> A|B iff A and B have no bits set in common.
+-  if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
+-    return BinaryOperator::CreateOr(LHS, RHS);
+  
+  // Disable this transformation for ISPC SPIR-V
+  if (!Triple(I.getModule()->getTargetTriple()).isSPIR()) {
+    // A+B --> A|B iff A and B have no bits set in common.
+    if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
+      return BinaryOperator::CreateOr(LHS, RHS);
+  }
+ 
+   // FIXME: We already did a check for ConstantInt RHS above this.
+   // FIXME: Is this pattern covered by another fold? No regression tests fail on
+-- 
+2.17.1
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch
new file mode 100644
index 00000000..9ceed0a9
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch
@@ -0,0 +1,95 @@
+From c20838176e8bea9e5a176c59c78bbce9051ec987 Mon Sep 17 00:00:00 2001
+From: Naveen Saini <naveen.kumar.saini@intel.com>
+Date: Fri, 27 Aug 2021 11:41:47 +0800
+Subject: [PATCH 2/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we
+ store zeros in the rest of the byte
+We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0.
+I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue.
+Should fix PR48147.
+Reviewed By: RKSimon
+Differential Revision: https://reviews.llvm.org/D91294
+Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/a4124e455e641db1e18d4221d2dacb31953fd13b]
+Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
+---
+ llvm/lib/Target/X86/X86ISelLowering.cpp | 19 ++++++++++++++-----
+ llvm/lib/Target/X86/X86InstrAVX512.td   |  3 ---
+ 2 files changed, 14 insertions(+), 8 deletions(-)
+diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
+index 56690c3c555b..7e673a3163b7 100644
+--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
+@@ -23549,17 +23549,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
+   // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
+   if (StoredVal.getValueType().isVector() &&
+       StoredVal.getValueType().getVectorElementType() == MVT::i1) {
+-    assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
+-           "Unexpected VT");
+    unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
+    assert(NumElts <= 8 && "Unexpected VT");
+     assert(!St->isTruncatingStore() && "Expected non-truncating store");
+     assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
+            "Expected AVX512F without AVX512DQI");
+ 
+    // We must pad with zeros to ensure we store zeroes to any unused bits.
+     StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+                             DAG.getUNDEF(MVT::v16i1), StoredVal,
+                             DAG.getIntPtrConstant(0, dl));
+     StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
+     StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
+    // Make sure we store zeros in the extra bits.
+    if (NumElts < 8)
+      StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
+                                         MVT::getIntegerVT(NumElts));
+ 
+     return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+                         St->getPointerInfo(), St->getOriginalAlign(),
+@@ -44133,17 +44138,21 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
+   if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&
+       StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+       StoredVal.getOperand(0).getValueType() == MVT::i8) {
+-    return DAG.getStore(St->getChain(), dl, StoredVal.getOperand(0),
+    SDValue Val = StoredVal.getOperand(0);
+    // We must store zeros to the unused bits.
+    Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
+    return DAG.getStore(St->getChain(), dl, Val,
+                         St->getBasePtr(), St->getPointerInfo(),
+                         St->getOriginalAlign(),
+                         St->getMemOperand()->getFlags());
+   }
+ 
+   // Widen v2i1/v4i1 stores to v8i1.
+-  if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
+  if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
+       Subtarget.hasAVX512()) {
+     unsigned NumConcats = 8 / VT.getVectorNumElements();
+-    SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
+    // We must store zeros to the unused bits.
+    SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
+     Ops[0] = StoredVal;
+     StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
+     return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
+index a3ad0b1c8dd6..aa1ccec02f2a 100644
+--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
+@@ -2871,9 +2871,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
+ 
+ // Load/store kreg
+ let Predicates = [HasDQI] in {
+-  def : Pat<(store VK1:$src, addr:$dst),
+-            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
+-
+   def : Pat<(v1i1 (load addr:$src)),
+             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
+   def : Pat<(v2i1 (load addr:$src)),
+-- 
+2.17.1

diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0007-ispc-11_0_11_1_disable-A-B-A-B-in-InstCombine.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0007-ispc-11_0_11_1_disable-A-B-A-B-in-InstCombine.patch new file mode 100644 index 00000000..dcf26bc9 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0007-ispc-11_0_11_1_disable-A-B-A-B-in-InstCombine.patch
@@ -0,0 +1,47 @@
	1	From ef2b930a8e33078449737a93e7d522b2280ec58c Mon Sep 17 00:00:00 2001
	2	From: Naveen Saini <naveen.kumar.saini@intel.com>
	3	Date: Fri, 27 Aug 2021 11:39:16 +0800
	4	Subject: [PATCH 1/2] This patch is needed for ISPC for Gen only
	5
	6	Transformation of add to or is not safe for VC backend.
	7
	8	Upstream-Status: Backport [Taken from ispc,https://github.com/ispc/ispc/blob/v1.16.1/llvm_patches/11_0_11_1_disable-A-B-A-B-in-InstCombine.patch]
	9
	10	Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
	11	---
	12	.../lib/Transforms/InstCombine/InstCombineAddSub.cpp \| 12 ++++++++----
	13	1 file changed, 8 insertions(+), 4 deletions(-)
	14
	15	diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
	16	index a7f5e0a7774d..bf02b0f70827 100644
	17	--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
	18	+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
	19	@@ -15,6 +15,7 @@
	20	#include "llvm/ADT/APInt.h"
	21	#include "llvm/ADT/STLExtras.h"
	22	#include "llvm/ADT/SmallVector.h"
	23	+#include "llvm/ADT/Triple.h"
	24	#include "llvm/Analysis/InstructionSimplify.h"
	25	#include "llvm/Analysis/ValueTracking.h"
	26	#include "llvm/IR/Constant.h"
	27	@@ -1324,10 +1325,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
	28	return BinaryOperator::CreateSRem(RHS, NewRHS);
	29	}
	30	}
	31	-
	32	- // A+B --> A\|B iff A and B have no bits set in common.
	33	- if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
	34	- return BinaryOperator::CreateOr(LHS, RHS);
	35	+
	36	+ // Disable this transformation for ISPC SPIR-V
	37	+ if (!Triple(I.getModule()->getTargetTriple()).isSPIR()) {
	38	+ // A+B --> A\|B iff A and B have no bits set in common.
	39	+ if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
	40	+ return BinaryOperator::CreateOr(LHS, RHS);
	41	+ }
	42
	43	// FIXME: We already did a check for ConstantInt RHS above this.
	44	// FIXME: Is this pattern covered by another fold? No regression tests fail on
	45	--
	46	2.17.1
	47


diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch new file mode 100644 index 00000000..9ceed0a9 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch
@@ -0,0 +1,95 @@
	1	From c20838176e8bea9e5a176c59c78bbce9051ec987 Mon Sep 17 00:00:00 2001
	2	From: Naveen Saini <naveen.kumar.saini@intel.com>
	3	Date: Fri, 27 Aug 2021 11:41:47 +0800
	4	Subject: [PATCH 2/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we
	5	store zeros in the rest of the byte
	6
	7	We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0.
	8
	9	I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue.
	10
	11	Should fix PR48147.
	12
	13	Reviewed By: RKSimon
	14
	15	Differential Revision: https://reviews.llvm.org/D91294
	16
	17	Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/a4124e455e641db1e18d4221d2dacb31953fd13b]
	18
	19	Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
	20	---
	21	llvm/lib/Target/X86/X86ISelLowering.cpp \| 19 ++++++++++++++-----
	22	llvm/lib/Target/X86/X86InstrAVX512.td \| 3 ---
	23	2 files changed, 14 insertions(+), 8 deletions(-)
	24
	25	diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
	26	index 56690c3c555b..7e673a3163b7 100644
	27	--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
	28	+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
	29	@@ -23549,17 +23549,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
	30	// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
	31	if (StoredVal.getValueType().isVector() &&
	32	StoredVal.getValueType().getVectorElementType() == MVT::i1) {
	33	- assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
	34	- "Unexpected VT");
	35	+ unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
	36	+ assert(NumElts <= 8 && "Unexpected VT");
	37	assert(!St->isTruncatingStore() && "Expected non-truncating store");
	38	assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
	39	"Expected AVX512F without AVX512DQI");
	40
	41	+ // We must pad with zeros to ensure we store zeroes to any unused bits.
	42	StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
	43	DAG.getUNDEF(MVT::v16i1), StoredVal,
	44	DAG.getIntPtrConstant(0, dl));
	45	StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
	46	StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
	47	+ // Make sure we store zeros in the extra bits.
	48	+ if (NumElts < 8)
	49	+ StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
	50	+ MVT::getIntegerVT(NumElts));
	51
	52	return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
	53	St->getPointerInfo(), St->getOriginalAlign(),
	54	@@ -44133,17 +44138,21 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
	55	if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&
	56	StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	57	StoredVal.getOperand(0).getValueType() == MVT::i8) {
	58	- return DAG.getStore(St->getChain(), dl, StoredVal.getOperand(0),
	59	+ SDValue Val = StoredVal.getOperand(0);
	60	+ // We must store zeros to the unused bits.
	61	+ Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
	62	+ return DAG.getStore(St->getChain(), dl, Val,
	63	St->getBasePtr(), St->getPointerInfo(),
	64	St->getOriginalAlign(),
	65	St->getMemOperand()->getFlags());
	66	}
	67
	68	// Widen v2i1/v4i1 stores to v8i1.
	69	- if ((VT == MVT::v2i1 \|\| VT == MVT::v4i1) && VT == StVT &&
	70	+ if ((VT == MVT::v1i1 \|\| VT == MVT::v2i1 \|\| VT == MVT::v4i1) && VT == StVT &&
	71	Subtarget.hasAVX512()) {
	72	unsigned NumConcats = 8 / VT.getVectorNumElements();
	73	- SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
	74	+ // We must store zeros to the unused bits.
	75	+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
	76	Ops[0] = StoredVal;
	77	StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
	78	return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
	79	diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
	80	index a3ad0b1c8dd6..aa1ccec02f2a 100644
	81	--- a/llvm/lib/Target/X86/X86InstrAVX512.td
	82	+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
	83	@@ -2871,9 +2871,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
	84
	85	// Load/store kreg
	86	let Predicates = [HasDQI] in {
	87	- def : Pat<(store VK1:$src, addr:$dst),
	88	- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
	89	-
	90	def : Pat<(v1i1 (load addr:$src)),
	91	(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
	92	def : Pat<(v2i1 (load addr:$src)),
	93	--
	94	2.17.1
	95