diff options
Diffstat (limited to 'dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch')
-rw-r--r-- | dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch | 173 |
1 files changed, 0 insertions, 173 deletions
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch deleted file mode 100644 index 03b40e9b..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch +++ /dev/null | |||
@@ -1,173 +0,0 @@ | |||
1 | From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 12:15:09 +0800 | ||
4 | Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via | ||
5 | getRegisterTypeForCallingConv rather than using CCPromoteToType in the td | ||
6 | file | ||
7 | |||
8 | Previously we tried to promote these to xmm/ymm/zmm by promoting | ||
9 | in the X86CallingConv.td file. But this breaks when we run out | ||
10 | of xmm/ymm/zmm registers and need to fall back to memory. We end | ||
11 | up trying to create a non-sensical scalar to vector. This lead | ||
12 | to an assertion. The new tests in avx512-calling-conv.ll all | ||
13 | trigger this assertion. | ||
14 | |||
15 | Since we really want to treat these types like we do on avx2, | ||
16 | it seems better to promote them before the calling convention | ||
17 | code gets involved. Except when the calling convention is one | ||
18 | that passes the vXi1 type in a k register. | ||
19 | |||
20 | The changes in avx512-regcall-Mask.ll are because we indicated | ||
21 | that xmm/ymm/zmm types should be passed indirectly for the | ||
22 | Win64 ABI before we go to the common lines that promoted the | ||
23 | vXi1 types. This caused the promoted types to be picked up by | ||
24 | the default calling convention code. Now we promote them earlier | ||
25 | so they get passed indirectly as though they were xmm/ymm/zmm. | ||
26 | |||
27 | Differential Revision: https://reviews.llvm.org/D75154 | ||
28 | |||
29 | Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc] | ||
30 | |||
31 | Signed-off-by:Craig Topper <craig.topper@intel.com> | ||
32 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
33 | --- | ||
34 | llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++-------- | ||
35 | 1 file changed, 61 insertions(+), 29 deletions(-) | ||
36 | |||
37 | diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
38 | index 96b5e2cfbd82..d5de94aeb8a2 100644 | ||
39 | --- a/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
40 | +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
41 | @@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { | ||
42 | return TargetLoweringBase::getPreferredVectorAction(VT); | ||
43 | } | ||
44 | |||
45 | +static std::pair<MVT, unsigned> | ||
46 | +handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, | ||
47 | + const X86Subtarget &Subtarget) { | ||
48 | + // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling | ||
49 | + // convention is one that uses k registers. | ||
50 | + if (NumElts == 2) | ||
51 | + return {MVT::v2i64, 1}; | ||
52 | + if (NumElts == 4) | ||
53 | + return {MVT::v4i32, 1}; | ||
54 | + if (NumElts == 8 && CC != CallingConv::X86_RegCall && | ||
55 | + CC != CallingConv::Intel_OCL_BI) | ||
56 | + return {MVT::v8i16, 1}; | ||
57 | + if (NumElts == 16 && CC != CallingConv::X86_RegCall && | ||
58 | + CC != CallingConv::Intel_OCL_BI) | ||
59 | + return {MVT::v16i8, 1}; | ||
60 | + // v32i1 passes in ymm unless we have BWI and the calling convention is | ||
61 | + // regcall. | ||
62 | + if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall)) | ||
63 | + return {MVT::v32i8, 1}; | ||
64 | + // Split v64i1 vectors if we don't have v64i8 available. | ||
65 | + if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) { | ||
66 | + if (Subtarget.useAVX512Regs()) | ||
67 | + return {MVT::v64i8, 1}; | ||
68 | + return {MVT::v32i8, 2}; | ||
69 | + } | ||
70 | + | ||
71 | + // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. | ||
72 | + if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) || | ||
73 | + NumElts > 64) | ||
74 | + return {MVT::i8, NumElts}; | ||
75 | + | ||
76 | + return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0}; | ||
77 | +} | ||
78 | + | ||
79 | MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, | ||
80 | CallingConv::ID CC, | ||
81 | EVT VT) const { | ||
82 | - // v32i1 vectors should be promoted to v32i8 to match avx2. | ||
83 | - if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) | ||
84 | - return MVT::v32i8; | ||
85 | - // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. | ||
86 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && | ||
87 | - Subtarget.hasAVX512() && | ||
88 | - (!isPowerOf2_32(VT.getVectorNumElements()) || | ||
89 | - (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || | ||
90 | - (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) | ||
91 | - return MVT::i8; | ||
92 | - // Split v64i1 vectors if we don't have v64i8 available. | ||
93 | - if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && | ||
94 | - CC != CallingConv::X86_RegCall) | ||
95 | - return MVT::v32i1; | ||
96 | + Subtarget.hasAVX512()) { | ||
97 | + unsigned NumElts = VT.getVectorNumElements(); | ||
98 | + | ||
99 | + MVT RegisterVT; | ||
100 | + unsigned NumRegisters; | ||
101 | + std::tie(RegisterVT, NumRegisters) = | ||
102 | + handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); | ||
103 | + if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) | ||
104 | + return RegisterVT; | ||
105 | + } | ||
106 | + | ||
107 | // FIXME: Should we just make these types legal and custom split operations? | ||
108 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI && | ||
109 | Subtarget.useAVX512Regs() && !Subtarget.hasBWI()) | ||
110 | return MVT::v16i32; | ||
111 | + | ||
112 | return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); | ||
113 | } | ||
114 | |||
115 | unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, | ||
116 | CallingConv::ID CC, | ||
117 | EVT VT) const { | ||
118 | - // v32i1 vectors should be promoted to v32i8 to match avx2. | ||
119 | - if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) | ||
120 | - return 1; | ||
121 | - // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. | ||
122 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && | ||
123 | - Subtarget.hasAVX512() && | ||
124 | - (!isPowerOf2_32(VT.getVectorNumElements()) || | ||
125 | - (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || | ||
126 | - (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) | ||
127 | - return VT.getVectorNumElements(); | ||
128 | - // Split v64i1 vectors if we don't have v64i8 available. | ||
129 | - if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && | ||
130 | - CC != CallingConv::X86_RegCall) | ||
131 | - return 2; | ||
132 | + Subtarget.hasAVX512()) { | ||
133 | + unsigned NumElts = VT.getVectorNumElements(); | ||
134 | + | ||
135 | + MVT RegisterVT; | ||
136 | + unsigned NumRegisters; | ||
137 | + std::tie(RegisterVT, NumRegisters) = | ||
138 | + handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); | ||
139 | + if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) | ||
140 | + return NumRegisters; | ||
141 | + } | ||
142 | + | ||
143 | // FIXME: Should we just make these types legal and custom split operations? | ||
144 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI && | ||
145 | Subtarget.useAVX512Regs() && !Subtarget.hasBWI()) | ||
146 | return 1; | ||
147 | + | ||
148 | return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); | ||
149 | } | ||
150 | |||
151 | @@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( | ||
152 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && | ||
153 | Subtarget.hasAVX512() && | ||
154 | (!isPowerOf2_32(VT.getVectorNumElements()) || | ||
155 | - (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || | ||
156 | - (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) { | ||
157 | + (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) || | ||
158 | + VT.getVectorNumElements() > 64)) { | ||
159 | RegisterVT = MVT::i8; | ||
160 | IntermediateVT = MVT::i1; | ||
161 | NumIntermediates = VT.getVectorNumElements(); | ||
162 | @@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( | ||
163 | // Split v64i1 vectors if we don't have v64i8 available. | ||
164 | if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && | ||
165 | CC != CallingConv::X86_RegCall) { | ||
166 | - RegisterVT = MVT::v32i1; | ||
167 | + RegisterVT = MVT::v32i8; | ||
168 | IntermediateVT = MVT::v32i1; | ||
169 | NumIntermediates = 2; | ||
170 | return 2; | ||
171 | -- | ||
172 | 2.17.1 | ||
173 | |||