summaryrefslogtreecommitdiffstats
path: root/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch
diff options
context:
space:
mode:
Diffstat (limited to 'dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch')
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch95
1 files changed, 95 insertions, 0 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch
new file mode 100644
index 00000000..9ceed0a9
--- /dev/null
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm11-0008-ispc-11_0_11_1_packed_load_store_avx512.patch
@@ -0,0 +1,95 @@
1From c20838176e8bea9e5a176c59c78bbce9051ec987 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 11:41:47 +0800
4Subject: [PATCH 2/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we
5 store zeros in the rest of the byte
6
7We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0.
8
9I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue.
10
11Should fix PR48147.
12
13Reviewed By: RKSimon
14
15Differential Revision: https://reviews.llvm.org/D91294
16
17Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/a4124e455e641db1e18d4221d2dacb31953fd13b]
18
19Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
20---
21 llvm/lib/Target/X86/X86ISelLowering.cpp | 19 ++++++++++++++-----
22 llvm/lib/Target/X86/X86InstrAVX512.td | 3 ---
23 2 files changed, 14 insertions(+), 8 deletions(-)
24
25diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
26index 56690c3c555b..7e673a3163b7 100644
27--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
28+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
29@@ -23549,17 +23549,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
30 // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
31 if (StoredVal.getValueType().isVector() &&
32 StoredVal.getValueType().getVectorElementType() == MVT::i1) {
33- assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
34- "Unexpected VT");
35+ unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
36+ assert(NumElts <= 8 && "Unexpected VT");
37 assert(!St->isTruncatingStore() && "Expected non-truncating store");
38 assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
39 "Expected AVX512F without AVX512DQI");
40
41+ // We must pad with zeros to ensure we store zeroes to any unused bits.
42 StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
43 DAG.getUNDEF(MVT::v16i1), StoredVal,
44 DAG.getIntPtrConstant(0, dl));
45 StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
46 StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
47+ // Make sure we store zeros in the extra bits.
48+ if (NumElts < 8)
49+ StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
50+ MVT::getIntegerVT(NumElts));
51
52 return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
53 St->getPointerInfo(), St->getOriginalAlign(),
54@@ -44133,17 +44138,21 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
55 if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&
56 StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR &&
57 StoredVal.getOperand(0).getValueType() == MVT::i8) {
58- return DAG.getStore(St->getChain(), dl, StoredVal.getOperand(0),
59+ SDValue Val = StoredVal.getOperand(0);
60+ // We must store zeros to the unused bits.
61+ Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
62+ return DAG.getStore(St->getChain(), dl, Val,
63 St->getBasePtr(), St->getPointerInfo(),
64 St->getOriginalAlign(),
65 St->getMemOperand()->getFlags());
66 }
67
68 // Widen v2i1/v4i1 stores to v8i1.
69- if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
70+ if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
71 Subtarget.hasAVX512()) {
72 unsigned NumConcats = 8 / VT.getVectorNumElements();
73- SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
74+ // We must store zeros to the unused bits.
75+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
76 Ops[0] = StoredVal;
77 StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
78 return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
79diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
80index a3ad0b1c8dd6..aa1ccec02f2a 100644
81--- a/llvm/lib/Target/X86/X86InstrAVX512.td
82+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
83@@ -2871,9 +2871,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
84
85 // Load/store kreg
86 let Predicates = [HasDQI] in {
87- def : Pat<(store VK1:$src, addr:$dst),
88- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
89-
90 def : Pat<(v1i1 (load addr:$src)),
91 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
92 def : Pat<(v2i1 (load addr:$src)),
93--
942.17.1
95