summaryrefslogtreecommitdiffstats
path: root/recipes-extended/dpdk/dpdk/0002-dma-idxd-fix-AVX2-in-non-datapath-functions.patch
blob: 4cd516480f345f8f87448b9fe9e5c930ace92efd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
From aa802b10237c2f7d3b0d0498de9b2fb438f9b9a2 Mon Sep 17 00:00:00 2001
From: Bruce Richardson <bruce.richardson@intel.com>
Date: Fri, 17 Jun 2022 11:59:20 +0100
Subject: [PATCH] dma/idxd: fix AVX2 in non-datapath functions

While all systems which will use the idxd driver for hardware will
support AVX2, if the driver is present the initialization functions e.g.
to register logs, will be called on all systems - irrespective of HW
support. This can cause issues if the system running DPDK does not have
AVX2, and the compiler has included AVX instructions in the
initialization code.

To fix this, remove AVX2 instruction set from the whole build of the
driver. Instead, we add "target(avx2)" attribute to all datapath
functions, so those - and only those functions - will having AVX2
instructions in them.

Bugzilla ID: 1038
Fixes: 3d36a0a1c7de ("dma/idxd: add data path job submission")
Cc: stable@dpdk.org

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
Acked-by: Conor Walsh <conor.walsh@intel.com>

Upstream-Status: Backport [http://git.dpdk.org/dpdk/commit/?id=aa802b10237c2f7d3b0d0498de9b2fb438f9b9a2]

Signed-off-by: Yongxin Liu <yongxin.liu@windriver.com>
---
 drivers/dma/idxd/idxd_common.c | 23 +++++++++++++++++++++++
 drivers/dma/idxd/meson.build   |  1 -
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/idxd/idxd_common.c b/drivers/dma/idxd/idxd_common.c
index ea6413cc7a..c77200a457 100644
--- a/drivers/dma/idxd/idxd_common.c
+++ b/drivers/dma/idxd/idxd_common.c
@@ -13,12 +13,23 @@
 
 #define IDXD_PMD_NAME_STR "dmadev_idxd"
 
+/* systems with DSA all support AVX2 so allow our data-path functions to
+ * always use at least that instruction set
+ */
+#ifndef __AVX2__
+#define __use_avx2 __attribute__((target("avx2")))
+#else
+#define __use_avx2
+#endif
+
+__use_avx2
 static __rte_always_inline rte_iova_t
 __desc_idx_to_iova(struct idxd_dmadev *idxd, uint16_t n)
 {
 	return idxd->desc_iova + (n * sizeof(struct idxd_hw_desc));
 }
 
+__use_avx2
 static __rte_always_inline void
 __idxd_movdir64b(volatile void *dst, const struct idxd_hw_desc *src)
 {
@@ -28,6 +39,7 @@ __idxd_movdir64b(volatile void *dst, const struct idxd_hw_desc *src)
 			: "memory");
 }
 
+__use_avx2
 static __rte_always_inline void
 __submit(struct idxd_dmadev *idxd)
 {
@@ -74,6 +86,7 @@ __submit(struct idxd_dmadev *idxd)
 			_mm256_setzero_si256());
 }
 
+__use_avx2
 static __rte_always_inline int
 __idxd_write_desc(struct idxd_dmadev *idxd,
 		const uint32_t op_flags,
@@ -112,6 +125,7 @@ __idxd_write_desc(struct idxd_dmadev *idxd,
 	return job_id;
 }
 
+__use_avx2
 int
 idxd_enqueue_copy(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
 		rte_iova_t dst, unsigned int length, uint64_t flags)
@@ -126,6 +140,7 @@ idxd_enqueue_copy(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
 			flags);
 }
 
+__use_avx2
 int
 idxd_enqueue_fill(void *dev_private, uint16_t qid __rte_unused, uint64_t pattern,
 		rte_iova_t dst, unsigned int length, uint64_t flags)
@@ -136,6 +151,7 @@ idxd_enqueue_fill(void *dev_private, uint16_t qid __rte_unused, uint64_t pattern
 			flags);
 }
 
+__use_avx2
 int
 idxd_submit(void *dev_private, uint16_t qid __rte_unused)
 {
@@ -143,6 +159,7 @@ idxd_submit(void *dev_private, uint16_t qid __rte_unused)
 	return 0;
 }
 
+__use_avx2
 static enum rte_dma_status_code
 get_comp_status(struct idxd_completion *c)
 {
@@ -163,6 +180,7 @@ get_comp_status(struct idxd_completion *c)
 	}
 }
 
+__use_avx2
 int
 idxd_vchan_status(const struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
 		enum rte_dma_vchan_status *status)
@@ -180,6 +198,7 @@ idxd_vchan_status(const struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
 	return 0;
 }
 
+__use_avx2
 static __rte_always_inline int
 batch_ok(struct idxd_dmadev *idxd, uint16_t max_ops, enum rte_dma_status_code *status)
 {
@@ -224,6 +243,7 @@ batch_ok(struct idxd_dmadev *idxd, uint16_t max_ops, enum rte_dma_status_code *s
 	return -1; /* error case */
 }
 
+__use_avx2
 static inline uint16_t
 batch_completed(struct idxd_dmadev *idxd, uint16_t max_ops, bool *has_error)
 {
@@ -275,6 +295,7 @@ batch_completed(struct idxd_dmadev *idxd, uint16_t max_ops, bool *has_error)
 	return ret;
 }
 
+__use_avx2
 static uint16_t
 batch_completed_status(struct idxd_dmadev *idxd, uint16_t max_ops, enum rte_dma_status_code *status)
 {
@@ -366,6 +387,7 @@ batch_completed_status(struct idxd_dmadev *idxd, uint16_t max_ops, enum rte_dma_
 	return ret;
 }
 
+__use_avx2
 uint16_t
 idxd_completed(void *dev_private, uint16_t qid __rte_unused, uint16_t max_ops,
 		uint16_t *last_idx, bool *has_error)
@@ -383,6 +405,7 @@ idxd_completed(void *dev_private, uint16_t qid __rte_unused, uint16_t max_ops,
 	return ret;
 }
 
+__use_avx2
 uint16_t
 idxd_completed_status(void *dev_private, uint16_t qid __rte_unused, uint16_t max_ops,
 		uint16_t *last_idx, enum rte_dma_status_code *status)
diff --git a/drivers/dma/idxd/meson.build b/drivers/dma/idxd/meson.build
index f1396be945..dcc0a297d7 100644
--- a/drivers/dma/idxd/meson.build
+++ b/drivers/dma/idxd/meson.build
@@ -5,7 +5,6 @@ build = dpdk_conf.has('RTE_ARCH_X86')
 reason = 'only supported on x86'
 
 deps += ['bus_pci']
-cflags += '-mavx2' # all platforms with idxd HW support AVX
 sources = files(
         'idxd_common.c',
         'idxd_pci.c',
-- 
2.32.0