diff options
Diffstat (limited to 'dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch')
-rw-r--r-- | dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch | 97 |
1 files changed, 0 insertions, 97 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch deleted file mode 100644 index d1768216..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch +++ /dev/null | |||
@@ -1,97 +0,0 @@ | |||
1 | From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 12:13:00 +0800 | ||
4 | Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we | ||
5 | store zeros in the rest of the byte | ||
6 | |||
7 | We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0. | ||
8 | |||
9 | I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue. | ||
10 | |||
11 | Should fix PR48147. | ||
12 | |||
13 | Reviewed By: RKSimon | ||
14 | |||
15 | Differential Revision: https://reviews.llvm.org/D9129 | ||
16 | |||
17 | Upstream-Status: Backport | ||
18 | |||
19 | Signed-off-by:Craig Topper <craig.topper@sifive.com> | ||
20 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
21 | --- | ||
22 | llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------ | ||
23 | llvm/lib/Target/X86/X86InstrAVX512.td | 2 -- | ||
24 | 2 files changed, 14 insertions(+), 8 deletions(-) | ||
25 | |||
26 | diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
27 | index 63eb050e9b3a..96b5e2cfbd82 100644 | ||
28 | --- a/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
29 | +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
30 | @@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, | ||
31 | // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores. | ||
32 | if (StoredVal.getValueType().isVector() && | ||
33 | StoredVal.getValueType().getVectorElementType() == MVT::i1) { | ||
34 | - assert(StoredVal.getValueType().getVectorNumElements() <= 8 && | ||
35 | - "Unexpected VT"); | ||
36 | + unsigned NumElts = StoredVal.getValueType().getVectorNumElements(); | ||
37 | + assert(NumElts <= 8 && "Unexpected VT"); | ||
38 | assert(!St->isTruncatingStore() && "Expected non-truncating store"); | ||
39 | assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && | ||
40 | "Expected AVX512F without AVX512DQI"); | ||
41 | |||
42 | + // We must pad with zeros to ensure we store zeroes to any unused bits. | ||
43 | StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, | ||
44 | DAG.getUNDEF(MVT::v16i1), StoredVal, | ||
45 | DAG.getIntPtrConstant(0, dl)); | ||
46 | StoredVal = DAG.getBitcast(MVT::i16, StoredVal); | ||
47 | StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal); | ||
48 | + // Make sure we store zeros in the extra bits. | ||
49 | + if (NumElts < 8) | ||
50 | + StoredVal = DAG.getZeroExtendInReg(StoredVal, dl, | ||
51 | + MVT::getIntegerVT(NumElts)); | ||
52 | |||
53 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), | ||
54 | St->getPointerInfo(), St->getAlignment(), | ||
55 | @@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, | ||
56 | |||
57 | EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); | ||
58 | StoredVal = DAG.getBitcast(NewVT, StoredVal); | ||
59 | - | ||
60 | - return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), | ||
61 | + SDValue Val = StoredVal.getOperand(0); | ||
62 | + // We must store zeros to the unused bits. | ||
63 | + Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1); | ||
64 | + return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(), | ||
65 | St->getPointerInfo(), St->getAlignment(), | ||
66 | St->getMemOperand()->getFlags()); | ||
67 | } | ||
68 | @@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, | ||
69 | } | ||
70 | |||
71 | // Widen v2i1/v4i1 stores to v8i1. | ||
72 | - if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && | ||
73 | + if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && | ||
74 | Subtarget.hasAVX512()) { | ||
75 | unsigned NumConcats = 8 / VT.getVectorNumElements(); | ||
76 | - SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT)); | ||
77 | + // We must store zeros to the unused bits. | ||
78 | + SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT)); | ||
79 | Ops[0] = StoredVal; | ||
80 | StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); | ||
81 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), | ||
82 | diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td | ||
83 | index 32f012033fb0..d3b92183f87b 100644 | ||
84 | --- a/llvm/lib/Target/X86/X86InstrAVX512.td | ||
85 | +++ b/llvm/lib/Target/X86/X86InstrAVX512.td | ||
86 | @@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), | ||
87 | |||
88 | // Load/store kreg | ||
89 | let Predicates = [HasDQI] in { | ||
90 | - def : Pat<(store VK1:$src, addr:$dst), | ||
91 | - (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; | ||
92 | |||
93 | def : Pat<(v1i1 (load addr:$src)), | ||
94 | (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; | ||
95 | -- | ||
96 | 2.17.1 | ||
97 | |||