diff options
Diffstat (limited to 'doc/book-enea-nfv-access-guide/doc/hypervisor_virtualization.xml')
-rw-r--r-- | doc/book-enea-nfv-access-guide/doc/hypervisor_virtualization.xml | 774 |
1 files changed, 0 insertions, 774 deletions
diff --git a/doc/book-enea-nfv-access-guide/doc/hypervisor_virtualization.xml b/doc/book-enea-nfv-access-guide/doc/hypervisor_virtualization.xml deleted file mode 100644 index 3f14719..0000000 --- a/doc/book-enea-nfv-access-guide/doc/hypervisor_virtualization.xml +++ /dev/null | |||
@@ -1,774 +0,0 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd"> | ||
4 | <chapter id="hypervisor_virt"> | ||
5 | <title>Hypervisor Virtualization</title> | ||
6 | |||
7 | <para>The Kernel-based Virtual Machine (KVM), is a virtualization | ||
8 | infrastructure for the Linux kernel, which turns it into a hypervisor. KVM | ||
9 | requires a processor with a hardware virtualization extension.</para> | ||
10 | |||
11 | <para>KVM uses QEMU, an open source machine emulator and virtualizer, to | ||
12 | virtualize a complete system. With KVM it is possible to run multiple guests | ||
13 | of a variety of operating systems, each with a complete set of virtualized | ||
14 | hardware.</para> | ||
15 | |||
16 | <section id="launch_virt_machine"> | ||
17 | <title>Launching a Virtual Machine</title> | ||
18 | |||
19 | <para>QEMU can make use of KVM when running a target architecture that is | ||
20 | the same as the host architecture. For instance, when running | ||
21 | <filename>qemu-system-aarch64</filename> on an <literal>aarch64</literal> | ||
22 | compatible processor with Hardware Virtualization support enabled, you can | ||
23 | take advantage of the KVM acceleration, an added benefit for your host and | ||
24 | guest system.</para> | ||
25 | |||
26 | <para>Enea NFV Access includes an optimized version of QEMU with KVM-only | ||
27 | support. To use KVM pass <command>--enable-kvm</command> to QEMU.</para> | ||
28 | |||
29 | <para>The following is an example of starting a guest system:</para> | ||
30 | |||
31 | <programlisting>taskset -c 0,1 qemu-system-aarch64 \ | ||
32 | -cpu host -machine virt,gic_version=3 -smp cores=2,sockets=1 \ | ||
33 | -vcpu 0,affinity=0 -vcpu 1,affinity=1 \ | ||
34 | -enable-kvm -nographic \ | ||
35 | -kernel Image \ | ||
36 | -drive file=enea-nfv-access-guest-qemuarm64.ext4,if=virtio,format=raw \ | ||
37 | -append 'root=/dev/vda console=ttyAMA0,115200' \ | ||
38 | -m 4096 \ | ||
39 | -object memory-backend-file,id=mem,size=4096M,mem-path=/dev/hugepages,share=on \ | ||
40 | -numa node,memdev=mem -mem-prealloc</programlisting> | ||
41 | </section> | ||
42 | |||
43 | <section id="qemu_boot"> | ||
44 | <title>Primary QEMU boot options</title> | ||
45 | |||
46 | <para>Below are detailed all the pertinent boot options for the QEMU | ||
47 | emulator:</para> | ||
48 | |||
49 | <itemizedlist> | ||
50 | <listitem> | ||
51 | <para>SMP - at least 2 cores should be enabled in order to isolate the | ||
52 | application(s) running in the virtual machine(s), on specific cores, | ||
53 | for better performance:</para> | ||
54 | |||
55 | <programlisting>-smp cores=2,threads=1,sockets=1 \</programlisting> | ||
56 | </listitem> | ||
57 | |||
58 | <listitem> | ||
59 | <para>CPU affinity - associate virtual CPUs with physical CPUs, and | ||
60 | optionally assign a default realtime priority to the virtual CPU | ||
61 | process in the host kernel. This option allows you to start QEMU vCPUs | ||
62 | on isolated physical CPUs:</para> | ||
63 | |||
64 | <programlisting>-vcpu 0,affinity=0 \</programlisting> | ||
65 | </listitem> | ||
66 | |||
67 | <listitem> | ||
68 | <para>Hugepages - KVM guests can be deployed with hugepage memory | ||
69 | support to reduce memory consumption and improve performance, by | ||
70 | reducing CPU cache usage. By using hugepages for a KVM guest, less | ||
71 | memory is used for page tables and TLB (Translation Lookaside Buffer) | ||
72 | misses are reduced, significantly increasing performance, especially | ||
73 | for memory-intensive situations.</para> | ||
74 | |||
75 | <programlisting>-object memory-backend-file,id=mem,size=4096M,mem-path=/dev/hugepages,share=on \</programlisting> | ||
76 | </listitem> | ||
77 | |||
78 | <listitem> | ||
79 | <para>Memory preallocation - preallocate hugepages at startup to | ||
80 | improve performance. This may affect QEMU boot time.</para> | ||
81 | |||
82 | <programlisting>-mem-prealloc \</programlisting> | ||
83 | </listitem> | ||
84 | |||
85 | <listitem> | ||
86 | <para>Enable realtime characteristics - run QEMU with realtime | ||
87 | features.</para> | ||
88 | |||
89 | <para>In this case, "realtime" is just an identifier for options that | ||
90 | are partially realtime. If you're running in a realtime or low latency | ||
91 | environment, and you don't want your pages to be swapped out, this can | ||
92 | be ensured by using <command>mlock=on</command>. If you want VM | ||
93 | density, then you may want swappable VMs, this can be done with | ||
94 | <command>mlock=off</command>.</para> | ||
95 | |||
96 | <programlisting>-realtime mlock=on \</programlisting> | ||
97 | </listitem> | ||
98 | </itemizedlist> | ||
99 | |||
100 | <para>If the hardware does not have an IOMMU, it will not be possible to | ||
101 | assign devices in KVM.</para> | ||
102 | </section> | ||
103 | |||
104 | <section id="net_in_guest"> | ||
105 | <title>Networking in guest</title> | ||
106 | |||
107 | <section id="vhost-user-support"> | ||
108 | <title>Using vhost-user support</title> | ||
109 | |||
110 | <para>The goal of <literal>vhost-user</literal> is to implement a Virtio | ||
111 | transport, staying as close as possible to the <literal>vhost</literal> | ||
112 | paradigm of using: shared memory, <literal>ioeventfds</literal> and | ||
113 | <literal>irqfds</literal>. A UNIX domain socket based mechanism, allows | ||
114 | for the set up of resources used by various <literal>Vrings</literal> | ||
115 | shared between two userspace processes, and will be placed in shared | ||
116 | memory.</para> | ||
117 | |||
118 | <para>To run QEMU with the <literal>vhost-user</literal> backend, you | ||
119 | have to provide the named UNIX domain socket, which needs to already be | ||
120 | opened by the backend:</para> | ||
121 | |||
122 | <programlisting>-object memory-backend-file,id=mem,size=4096M,mem-path=/dev/hugepages,share=on \ | ||
123 | -chardev socket,id=char0,path=/var/run/openvswitch/vhost-user1 \ | ||
124 | -netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce \ | ||
125 | -device virtio-net-pci,netdev=mynet1,mac=52:54:00:00:00:01 \</programlisting> | ||
126 | |||
127 | <para>The vHost User standard uses a client-server model. The server | ||
128 | creates and manages the vHost User sockets and the client connects to | ||
129 | the sockets created by the server. It is recommended to use QEMU as the | ||
130 | server, so that the <literal>vhost-user</literal> client can be | ||
131 | restarted without affecting the server, otherwise if the server side | ||
132 | dies, all clients need to be restarted.</para> | ||
133 | |||
134 | <para>Using <literal>vhost-user</literal> in QEMU will offer the | ||
135 | flexibility to stop and start the virtual machine with no impact on the | ||
136 | virtual switch from the host | ||
137 | (<literal>vhost-user-client</literal>).</para> | ||
138 | |||
139 | <programlisting>-chardev socket,id=char0,path=/var/run/openvswitch/vhost-user1,server \</programlisting> | ||
140 | </section> | ||
141 | |||
142 | <section id="tap-interface"> | ||
143 | <title>Using TAP Interfaces</title> | ||
144 | |||
145 | <para>QEMU can use TAP interfaces to provide full networking capability | ||
146 | for the guest OS:</para> | ||
147 | |||
148 | <programlisting>-netdev tap,id=net0,ifname=tap0,script=no,downscript=no \ | ||
149 | -device virtio-net-pci,netdev=net0,mac=22:EA:FB:A8:25:AE \</programlisting> | ||
150 | </section> | ||
151 | |||
152 | <section id="vfio-passthrough"> | ||
153 | <title>VFIO passthrough VF (SR-IOV) to guest</title> | ||
154 | |||
155 | <para>KVM hypervisor support for attaching PCI devices on the host | ||
156 | system to guests. PCI passthrough allows guests to have exclusive access | ||
157 | to PCI devices for a range of tasks. PCI passthrough allows PCI devices | ||
158 | to appear and behave as if they were physically attached to the guest | ||
159 | operating system.</para> | ||
160 | |||
161 | <para>Create guest with direct passthrough via VFIO framework like | ||
162 | so:</para> | ||
163 | |||
164 | <programlisting>-device vfio-pci,host=0001:01:00.1 \</programlisting> | ||
165 | |||
166 | <para>On the host, there are Virtual Functions for each physical | ||
167 | Ethernet port. The primary VF has to be used for a guest network to | ||
168 | access, before starting QEMU:</para> | ||
169 | |||
170 | <programlisting>$ modprobe vfio_pci | ||
171 | $ dpdk-devbind.py --bind=vfio-pci 0001:01:00.1</programlisting> | ||
172 | |||
173 | <para>For more details related to Virtual Functions, see: <ulink | ||
174 | url="http://dpdk.org/doc/guides/nics/thunderx.html">http://dpdk.org/doc/guides/nics/thunderx.html</ulink>.</para> | ||
175 | </section> | ||
176 | |||
177 | <section> | ||
178 | <title>Enable VFIO-NOIOMMU mode</title> | ||
179 | |||
180 | <para>In order to run a DPDK application in VM, the VFIO-NOIOMMU needs | ||
181 | to be set: <programlisting>echo 1 > /sys/module/vfio/parameters/enable_unsafe_noiommu_mode</programlisting></para> | ||
182 | </section> | ||
183 | |||
184 | <section id="multiqueue"> | ||
185 | <title>Multi-queue</title> | ||
186 | |||
187 | <para>In order to ensure that network performance scales as the number | ||
188 | of vCPUs increases, multi-queue support can be used in QEMU.</para> | ||
189 | |||
190 | <section id="qemu-multiqueue-support"> | ||
191 | <title>QEMU multi-queue support configuration</title> | ||
192 | |||
193 | <para>Below is an example of how to set up the QEMU multi-queue | ||
194 | support configuration:</para> | ||
195 | |||
196 | <programlisting>-chardev socket,id=char0,path=/var/run/openvswitch/vhost-user1 \ | ||
197 | -netdev type=vhost-user,id=net0,chardev=char0,queues=2 \ | ||
198 | -device virtio-net-pci,netdev=net0,mac=22:EA:FB:A8:25:AE,mq=on,vectors=6 | ||
199 | where vectors is calculated as: 2 + 2 * queues number.</programlisting> | ||
200 | </section> | ||
201 | |||
202 | <section id="inside-guest"> | ||
203 | <title>Inside guest</title> | ||
204 | |||
205 | <para>The Linux kernel <filename>virtio-net</filename> driver, where | ||
206 | one queue is enabled by default:</para> | ||
207 | |||
208 | <programlisting>$ ethtool -L combined 2 eth0 | ||
209 | DPDK Virtio PMD | ||
210 | $ testpmd -c 0x7 -- -i --rxq=2 --txq=2 --nb-cores=2 ...</programlisting> | ||
211 | |||
212 | <para>For QEMU documentation, see: <ulink | ||
213 | url="https://qemu.weilnetz.de/doc/qemu-doc.html">QEMU User | ||
214 | Documentation</ulink>.</para> | ||
215 | </section> | ||
216 | </section> | ||
217 | </section> | ||
218 | |||
219 | <section id="libvirt"> | ||
220 | <title>Libvirt</title> | ||
221 | |||
222 | <para>One way to manage guests in Enea NFV Access is by using | ||
223 | <literal>libvirt</literal>. Libvirt is used in conjunction with a daemon | ||
224 | (<literal>libvirtd</literal>) and a command line utility | ||
225 | (<literal>virsh</literal>) to manage virtualized environments.</para> | ||
226 | |||
227 | <para>The <literal>libvirt</literal> library is a hypervisor-independent | ||
228 | virtualization API and toolkit that is able to interact with the | ||
229 | virtualization capabilities of a range of operating systems. | ||
230 | <literal>Libvirt</literal> provides a common, generic and stable layer to | ||
231 | securely manage domains on a node. As nodes may be remotely located, it | ||
232 | provides all methods required to provision, create, modify, monitor, | ||
233 | control, migrate and stop the domains, within the limits of hypervisor | ||
234 | support for these operations.</para> | ||
235 | |||
236 | <para>The <literal>libvirt</literal> daemon runs on the Enea NFV Access | ||
237 | host. All tools built upon the libvirt API, connect to the daemon to | ||
238 | request the desired operation, and to collect information about the | ||
239 | configuration and resources of the host system and guests. | ||
240 | <literal>virsh</literal> is a command line interface tool for managing | ||
241 | guests and the hypervisor. The <literal>virsh</literal> tool is built upon | ||
242 | the libvirt management API.</para> | ||
243 | |||
244 | <para><emphasis role="bold">Major functionality provided by | ||
245 | libvirt</emphasis></para> | ||
246 | |||
247 | <para>The following is a summary from the libvirt <ulink | ||
248 | url="http://wiki.libvirt.org/page/FAQ#What_is_libvirt.3F">home | ||
249 | page</ulink> describing the major libvirt features:</para> | ||
250 | |||
251 | <itemizedlist> | ||
252 | <listitem> | ||
253 | <para><emphasis role="bold">VM management:</emphasis> Various domain | ||
254 | lifecycle operations such as start, stop, pause, save, restore, and | ||
255 | migrate. Hotplug operations for many device types including disk and | ||
256 | network interfaces, memory, and CPUs.</para> | ||
257 | </listitem> | ||
258 | |||
259 | <listitem> | ||
260 | <para><emphasis role="bold">Remote machine support:</emphasis> All | ||
261 | libvirt functionality is accessible on any machine running the libvirt | ||
262 | daemon, including remote machines. A variety of network transports are | ||
263 | supported for connecting remotely, with the simplest being | ||
264 | <literal>SSH</literal>, which requires no extra explicit | ||
265 | configuration. For more information, see: <ulink | ||
266 | url="http://libvirt.org/remote.html">http://libvirt.org/remote.html</ulink>.</para> | ||
267 | </listitem> | ||
268 | |||
269 | <listitem> | ||
270 | <para><emphasis role="bold">Network interface management:</emphasis> | ||
271 | Any host running the libvirt daemon can be used to manage physical and | ||
272 | logical network interfaces. Enumerate existing interfaces, as well as | ||
273 | configure (and create) interfaces, bridges, vlans, and bond devices. | ||
274 | For more details see: <ulink | ||
275 | url="https://fedorahosted.org/netcf/">https://fedorahosted.org/netcf/</ulink>.</para> | ||
276 | </listitem> | ||
277 | |||
278 | <listitem> | ||
279 | <para><emphasis role="bold">Virtual NAT and Route based | ||
280 | networking:</emphasis> Any host running the libvirt daemon can manage | ||
281 | and create virtual networks. Libvirt virtual networks use firewall | ||
282 | rules to act as a router, providing VMs transparent access to the host | ||
283 | machines network. For more information, see: <ulink | ||
284 | url="http://libvirt.org/archnetwork.html">http://libvirt.org/archnetwork.html</ulink>.</para> | ||
285 | </listitem> | ||
286 | |||
287 | <listitem> | ||
288 | <para><emphasis role="bold">Storage management:</emphasis> Any host | ||
289 | running the libvirt daemon can be used to manage various types of | ||
290 | storage: create file images of various formats (raw, qcow2, etc.), | ||
291 | mount NFS shares, enumerate existing LVM volume groups, create new LVM | ||
292 | volume groups and logical volumes, partition raw disk devices, mount | ||
293 | iSCSI shares, and much more. For more details, see: <ulink | ||
294 | url="http://libvirt.org/storage.html">http://libvirt.org/storage.html</ulink>.</para> | ||
295 | </listitem> | ||
296 | |||
297 | <listitem> | ||
298 | <para><emphasis role="bold">Libvirt Configuration:</emphasis> A | ||
299 | properly running libvirt requires that the following elements be in | ||
300 | place:</para> | ||
301 | |||
302 | <itemizedlist> | ||
303 | <listitem> | ||
304 | <para>Configuration files, located in the directory | ||
305 | <literal>/etc/libvirt</literal>. They include the daemon's | ||
306 | configuration file <literal>libvirtd.conf</literal>, and | ||
307 | hypervisor-specific configuration files, like | ||
308 | <literal>qemu.conf</literal> for the QEMU.</para> | ||
309 | </listitem> | ||
310 | |||
311 | <listitem> | ||
312 | <para>A running libvirtd daemon. The daemon is started | ||
313 | automatically in Enea NFV Access host.</para> | ||
314 | </listitem> | ||
315 | |||
316 | <listitem> | ||
317 | <para>Configuration files for the libvirt domains, or guests, to | ||
318 | be managed by the KVM host. The specifics for guest domains shall | ||
319 | be defined in an XML file of a format specified at <ulink | ||
320 | url="http://libvirt.org/formatdomain.html">http://libvirt.org/formatdomain.html</ulink>. | ||
321 | XML formats for other structures are specified at <ulink type="" | ||
322 | url="http://libvirt.org/format.html">http://libvirt.org/format.html</ulink>.</para> | ||
323 | </listitem> | ||
324 | </itemizedlist> | ||
325 | </listitem> | ||
326 | </itemizedlist> | ||
327 | |||
328 | <section id="boot-kvm-guest"> | ||
329 | <title>Booting a KVM Guest</title> | ||
330 | |||
331 | <para>There are several ways to boot a KVM guest. Here we describe how | ||
332 | to boot using a raw image. A direct kernel boot can be performed by | ||
333 | transferring the guest kernel and the file system files to the host and | ||
334 | specifying a <literal><kernel></literal> and an | ||
335 | <literal><initrd></literal> element inside the | ||
336 | <literal><os></literal> element of the guest XML file, as in the | ||
337 | following example:</para> | ||
338 | |||
339 | <programlisting><os> | ||
340 | <kernel>Image</kernel> | ||
341 | </os> | ||
342 | <devices> | ||
343 | <disk type='file' device='disk'> | ||
344 | <driver name='qemu' type='raw' cache='none'/> | ||
345 | <source file='enea-nfv-access-guest-qemuarm64.ext4'/> | ||
346 | <target dev='vda' bus='virtio'/> | ||
347 | </disk> | ||
348 | </devices></programlisting> | ||
349 | </section> | ||
350 | |||
351 | <section id="start-guest"> | ||
352 | <title>Starting a Guest</title> | ||
353 | |||
354 | <para>Command <command>virsh create</command> starts a guest:</para> | ||
355 | |||
356 | <programlisting>virsh create example-guest-arm.xml</programlisting> | ||
357 | |||
358 | <para>If further configurations are needed before the guest is reachable | ||
359 | through <literal>ssh</literal>, a console can be started using command | ||
360 | <command>virsh console</command>. The example below shows how to start a | ||
361 | console where kvm-example-guest is the name of the guest defined in the | ||
362 | guest XML file:</para> | ||
363 | |||
364 | <programlisting>virsh console kvm-example-guest</programlisting> | ||
365 | |||
366 | <para>This requires that the guest domain has a console configured in | ||
367 | the guest XML file:</para> | ||
368 | |||
369 | <programlisting><os> | ||
370 | <cmdline>console=ttyAMA0,115200</cmdline> | ||
371 | </os> | ||
372 | <devices> | ||
373 | <console type='pty'> | ||
374 | <target type='serial' port='0'/> | ||
375 | </console> | ||
376 | </devices></programlisting> | ||
377 | </section> | ||
378 | |||
379 | <section id="isolation"> | ||
380 | <title>Isolation</title> | ||
381 | |||
382 | <para>It may be desirable to isolate execution in a guest, to a specific | ||
383 | guest core. It might also be desirable to run a guest on a specific host | ||
384 | core.</para> | ||
385 | |||
386 | <para>To pin the virtual CPUs of the guest to specific cores, configure | ||
387 | the <literal><cputune></literal> contents as follows:</para> | ||
388 | |||
389 | <orderedlist> | ||
390 | <listitem> | ||
391 | <para>First explicitly state on which host core each guest core | ||
392 | shall run, by mapping <literal>vcpu</literal> to | ||
393 | <literal>cpuset</literal> in the <literal><vcpupin></literal> | ||
394 | tag.</para> | ||
395 | </listitem> | ||
396 | |||
397 | <listitem> | ||
398 | <para>In the <literal><cputune></literal> tag it is further | ||
399 | possible to specify on which CPU the emulator shall run by adding | ||
400 | the <literal>cpuset</literal> to the | ||
401 | <literal><emulatorpin></literal> tag.</para> | ||
402 | |||
403 | <programlisting><vcpu placement='static'>2</vcpu> | ||
404 | <cputune> | ||
405 | <vcpupin vcpu='0' cpuset='2'/> | ||
406 | <vcpupin vcpu='1' cpuset='3'/> | ||
407 | <emulatorpin cpuset="2"/> | ||
408 | </cputune></programlisting> | ||
409 | |||
410 | <para><literal>libvirt</literal> will group all threads belonging to | ||
411 | a QEMU instance into cgroups that will be created for that purpose. | ||
412 | It is possible to supply a base name for those cgroups using the | ||
413 | <literal><resource></literal> tag:</para> | ||
414 | |||
415 | <programlisting><resource> | ||
416 | <partition>/rt</partition> | ||
417 | </resource></programlisting> | ||
418 | </listitem> | ||
419 | </orderedlist> | ||
420 | </section> | ||
421 | |||
422 | <section id="network-libvirt"> | ||
423 | <title>Networking using libvirt</title> | ||
424 | |||
425 | <para>Command <command>virsh net-create</command> starts a network. If | ||
426 | any networks are listed in the guest XML file, those networks must be | ||
427 | started before the guest is started. As an example, if the network is | ||
428 | defined in a file named <filename>example-net.xml</filename>, it will be | ||
429 | started as such:</para> | ||
430 | |||
431 | <programlisting>virsh net-create example-net.xml | ||
432 | <network> | ||
433 | <name>sriov</name> | ||
434 | <forward mode='hostdev' managed='yes'> | ||
435 | <pf dev='eno3'/> | ||
436 | </forward> | ||
437 | </network></programlisting> | ||
438 | |||
439 | <para><literal>libvirt</literal> is a virtualization API that supports | ||
440 | virtual network creation. These networks can be connected to guests and | ||
441 | containers by referencing the network in the guest XML file. It is | ||
442 | possible to have a virtual network persistently running on the host by | ||
443 | starting the network with command <command>virsh net-define</command> | ||
444 | instead of the previously mentioned <command>virsh | ||
445 | net-create</command>.</para> | ||
446 | |||
447 | <para>An example for the sample network defined in | ||
448 | <literal>meta-vt/recipes-example/virt-example/files/example-net.xml</literal>:</para> | ||
449 | |||
450 | <programlisting>virsh net-define example-net.xml</programlisting> | ||
451 | |||
452 | <para>Command <command>virsh net-autostart</command> enables a | ||
453 | persistent network to start automatically when the libvirt daemon | ||
454 | starts:</para> | ||
455 | |||
456 | <programlisting>virsh net-autostart example-net</programlisting> | ||
457 | |||
458 | <para>The guest configuration file (xml) must be updated to access the | ||
459 | newly created network like so:</para> | ||
460 | |||
461 | <programlisting> <interface type='network'> | ||
462 | <source network='sriov'/> | ||
463 | </interface></programlisting> | ||
464 | |||
465 | <para>The following are a few ways of network access from a guest while | ||
466 | using <command>virsh</command>:</para> | ||
467 | |||
468 | <itemizedlist> | ||
469 | <listitem> | ||
470 | <para><emphasis role="bold">vhost-user interface</emphasis></para> | ||
471 | |||
472 | <para>See the Open vSwitch chapter on how to create a | ||
473 | <literal>vhost-user</literal> interface using Open vSwitch(OVS). | ||
474 | Currently there is no Open vSwitch(OVS) support for networks that | ||
475 | are managed by <literal>libvirt </literal>(e.g. NAT). Until further | ||
476 | notice, only bridged networks are supported (those where the user | ||
477 | has to manually create the bridge). In order to pass Open vSwitch | ||
478 | configuration the <qemu:commandline> is used.</para> | ||
479 | |||
480 | <programlisting> <qemu:commandline> | ||
481 | <qemu:arg value='-chardev'/> | ||
482 | <qemu:arg value='socket,id=charnet0,path=/var/run/openvswitch/vhost-user1'/> | ||
483 | <qemu:arg value='-netdev'/> | ||
484 | <qemu:arg value='type=vhost-user,id=hostnet0,chardev=charnet0'/> | ||
485 | <qemu:arg value='-device'/> | ||
486 | <qemu:arg value='virtio-net-pci,netdev=hostnet0,id=net0,mac=00:00:00:00:00:01,/ | ||
487 | bus=pcie.0,addr=0x2'/> | ||
488 | </qemu:commandline></programlisting> | ||
489 | </listitem> | ||
490 | |||
491 | <listitem> | ||
492 | <para><emphasis role="bold">PCI passthrough | ||
493 | (SR-IOV)</emphasis></para> | ||
494 | |||
495 | <para>KVM hypervisor support for attaching PCI devices on the host | ||
496 | system to guests. PCI passthrough allows guests to have exclusive | ||
497 | access to PCI devices for a range of tasks. PCI passthrough allows | ||
498 | PCI devices to appear and behave as if they were physically attached | ||
499 | to the guest operating system.</para> | ||
500 | |||
501 | <para>Preparing a system for PCI passthrough is done like so:</para> | ||
502 | |||
503 | <itemizedlist> | ||
504 | <listitem> | ||
505 | <para>Change the owner of the | ||
506 | <literal>/dev/vfio/<group></literal> to QEMU and edit | ||
507 | <literal>/etc/libvirt/qemu.conf</literal> to explicitly allow | ||
508 | permission to it:</para> | ||
509 | |||
510 | <para><programlisting>$ chown qemu:qemu /dev/vfio/66 | ||
511 | $ ls -l /dev/vfio/66 | ||
512 | crw------- 1 qemu qemu 244, 0 Dec 4 08:24 /dev/vfio/66 | ||
513 | |||
514 | $ cat /etc/libvirt/qemu.conf | ||
515 | ... | ||
516 | cgroup_device_acl = [ | ||
517 | "/dev/null", "/dev/full", "/dev/zero", | ||
518 | "/dev/random", "/dev/urandom", | ||
519 | "/dev/ptmx", "/dev/kvm", "/dev/kqemu", | ||
520 | "/dev/rtc","/dev/hpet", "/dev/vfio/vfio", | ||
521 | <command>"/dev/vfio/66"</command>, | ||
522 | ] | ||
523 | ...</programlisting></para> | ||
524 | </listitem> | ||
525 | |||
526 | <listitem> | ||
527 | <para>Increase the locked memory limits within the libvirtd | ||
528 | service file:</para> | ||
529 | |||
530 | <para><programlisting>$ cat /lib/systemd/system/libvirtd.service | ||
531 | ... | ||
532 | [Service] | ||
533 | Type=forking | ||
534 | PIDFile=/var/run/libvirtd.pid | ||
535 | EnvironmentFile=-/etc/sysconfig/libvirtd | ||
536 | <command>LimitMEMLOCK=infinity</command> | ||
537 | ExecStart=/usr/sbin/libvirtd $LIBVIRTD_ARGS | ||
538 | ExecReload=/bin/kill -HUP $MAINPID | ||
539 | KillMode=process | ||
540 | Restart=on-failure | ||
541 | # Override the maximum number of opened files | ||
542 | #LimitNOFILE=2048 | ||
543 | ...</programlisting></para> | ||
544 | </listitem> | ||
545 | |||
546 | <listitem> | ||
547 | <para>VFs must be created on the host before starting the | ||
548 | guest:</para> | ||
549 | |||
550 | <programlisting>$ modprobe vfio_pci | ||
551 | $ dpdk-devbind.py --bind=vfio-pci 0001:01:00.1 | ||
552 | <qemu:commandline> | ||
553 | <qemu:arg value='-device'/> | ||
554 | <qemu:arg value='vfio-pci,host=0001:01:00.1'/> | ||
555 | </qemu:commandline></programlisting> | ||
556 | </listitem> | ||
557 | </itemizedlist> | ||
558 | </listitem> | ||
559 | |||
560 | <listitem> | ||
561 | <para><emphasis role="bold">Bridge interface</emphasis></para> | ||
562 | |||
563 | <para>In case an OVS bridge exists on host, it can be used to | ||
564 | connect the guest:</para> | ||
565 | |||
566 | <programlisting> <interface type='bridge'> | ||
567 | <mac address='52:54:00:71:b1:b6'/> | ||
568 | <source bridge='ovsbr0'/> | ||
569 | <model type='rtl8139'/> | ||
570 | <virtualport type='openvswitch'/> | ||
571 | </interface></programlisting> | ||
572 | |||
573 | <para>For further details on the network XML format, see <ulink | ||
574 | url="http://libvirt.org/formatnetwork.html">http://libvirt.org/formatnetwork.html</ulink>.</para> | ||
575 | </listitem> | ||
576 | </itemizedlist> | ||
577 | </section> | ||
578 | |||
579 | <section id="libvirt-guest-config-ex"> | ||
580 | <title>Libvirt guest configuration examples</title> | ||
581 | |||
582 | <note> | ||
583 | <para>Currently used Libvirt version has some limitations for aarch64 | ||
584 | which are bypassed using the qemu:commandline option.</para> | ||
585 | </note> | ||
586 | |||
587 | <example> | ||
588 | <title>Guest configuration with vhost-user interface</title> | ||
589 | |||
590 | <programlisting><domain type='kvm' xmlns:qemu='http://libvirt.org/schemas/domain/qemu/1.0'> | ||
591 | <name>vm_vhost</name> | ||
592 | <uuid>ed204646-1ad5-11e7-93ae-92361f002671</uuid> | ||
593 | <memory unit='KiB'>4194304</memory> | ||
594 | <currentMemory unit='KiB'>4194304</currentMemory> | ||
595 | <memoryBacking> | ||
596 | <hugepages> | ||
597 | <page size='512' unit='M' nodeset='0'/> | ||
598 | </hugepages> | ||
599 | </memoryBacking> | ||
600 | <os> | ||
601 | <type arch='aarch64' machine='virt,gic_version=3'>hvm</type> | ||
602 | <kernel>/mnt/qemu/Image</kernel> | ||
603 | <cmdline>root=/dev/vda console=ttyAMA0,115200n8</cmdline> | ||
604 | <boot dev='hd'/> | ||
605 | </os> | ||
606 | <features> | ||
607 | <acpi/> | ||
608 | <apic/> | ||
609 | </features> | ||
610 | <vcpu placement='static'>2</vcpu> | ||
611 | <cpu mode='host-model'> | ||
612 | <model fallback='allow'/> | ||
613 | <topology sockets='1' cores='2' threads='1'/> | ||
614 | <numa> | ||
615 | <cell id='0' cpus='0' memory='4194304' unit='KiB' memAccess='shared'/> | ||
616 | </numa> | ||
617 | </cpu> | ||
618 | <cputune> | ||
619 | <vcpupin vcpu="0" cpuset="4"/> | ||
620 | <vcpupin vcpu="1" cpuset="5"/> | ||
621 | </cputune> | ||
622 | <on_poweroff>destroy</on_poweroff> | ||
623 | <on_reboot>restart</on_reboot> | ||
624 | <on_crash>destroy</on_crash> | ||
625 | <devices> | ||
626 | <emulator>/usr/bin/qemu-system-aarch64</emulator> | ||
627 | <disk type='file' device='disk'> | ||
628 | <driver name='qemu' type='raw' cache='none'/> | ||
629 | <source file='/mnt/qemu/enea-nfv-access-guest-qemuarm64.ext4'/> | ||
630 | <target dev='vda' bus='virtio'/> | ||
631 | </disk> | ||
632 | <serial type='pty'> | ||
633 | <target port='0'/> | ||
634 | </serial> | ||
635 | <console type='pty'> | ||
636 | <target type='serial' port='0'/> | ||
637 | </console> | ||
638 | </devices> | ||
639 | <qemu:commandline> | ||
640 | <qemu:arg value='-chardev'/> | ||
641 | <qemu:arg value='socket,id=charnet0,path=/var/run/openvswitch/vhost-user1'/> | ||
642 | <qemu:arg value='-netdev'/> | ||
643 | <qemu:arg value='type=vhost-user,id=hostnet0,chardev=charnet0'/> | ||
644 | <qemu:arg value='-device'/> | ||
645 | <qemu:arg value='virtio-net-pci,netdev=hostnet0,id=net0,mac=00:00:00:00:00:01,/ | ||
646 | bus=pcie.0,addr=0x2'/> | ||
647 | </qemu:commandline> | ||
648 | </domain></programlisting> | ||
649 | </example> | ||
650 | |||
651 | <example> | ||
652 | <title>Guest configuration with PCI passthrough</title> | ||
653 | |||
654 | <programlisting><domain type='kvm' xmlns:qemu='http://libvirt.org/schemas/domain/qemu/1.0'> | ||
655 | <name>vm_sriov</name> | ||
656 | <uuid>ed204646-1ad5-11e7-93ae-92361f002671</uuid> | ||
657 | <memory unit='KiB'>4194304</memory> | ||
658 | <currentMemory unit='KiB'>4194304</currentMemory> | ||
659 | <memoryBacking> | ||
660 | <hugepages> | ||
661 | <page size='512' unit='M' nodeset='0'/> | ||
662 | </hugepages> | ||
663 | </memoryBacking> | ||
664 | <os> | ||
665 | <type arch='aarch64' machine='virt,gic_version=3'>hvm</type> | ||
666 | <kernel>/mnt/qemu/Image</kernel> | ||
667 | <cmdline>root=/dev/vda console=ttyAMA0,115200n8</cmdline> | ||
668 | <boot dev='hd'/> | ||
669 | </os> | ||
670 | <features> | ||
671 | <acpi/> | ||
672 | <apic/> | ||
673 | </features> | ||
674 | <vcpu placement='static'>2</vcpu> | ||
675 | <cpu mode='host-model'> | ||
676 | <model fallback='allow'/> | ||
677 | <topology sockets='1' cores='2' threads='1'/> | ||
678 | <numa> | ||
679 | <cell id='0' cpus='0' memory='4194304' unit='KiB' memAccess='shared'/> | ||
680 | </numa> | ||
681 | </cpu> | ||
682 | <cputune> | ||
683 | <vcpupin vcpu="0" cpuset="4"/> | ||
684 | <vcpupin vcpu="1" cpuset="5"/> | ||
685 | </cputune> | ||
686 | <on_poweroff>destroy</on_poweroff> | ||
687 | <on_reboot>restart</on_reboot> | ||
688 | <on_crash>destroy</on_crash> | ||
689 | <devices> | ||
690 | <emulator>/usr/bin/qemu-system-aarch64</emulator> | ||
691 | <disk type='file' device='disk'> | ||
692 | <driver name='qemu' type='raw' cache='none'/> | ||
693 | <source file='/mnt/qemu/enea-nfv-access-guest-qemuarm64.ext4'/> | ||
694 | <target dev='vda' bus='virtio'/> | ||
695 | </disk> | ||
696 | <serial type='pty'> | ||
697 | <target port='0'/> | ||
698 | </serial> | ||
699 | <console type='pty'> | ||
700 | <target type='serial' port='0'/> | ||
701 | </console> | ||
702 | </devices> | ||
703 | <qemu:commandline> | ||
704 | <qemu:arg value='-device'/> | ||
705 | <qemu:arg value='vfio-pci,host=0001:01:00.1'/> | ||
706 | </qemu:commandline> | ||
707 | </domain></programlisting> | ||
708 | </example> | ||
709 | |||
710 | <example> | ||
711 | <title>Guest configuration with bridge interface</title> | ||
712 | |||
713 | <programlisting><domain type='kvm'> | ||
714 | <name>vm_bridge</name> | ||
715 | <uuid>4a9b3f53-fa2a-47f3-a757-dd87720d9d1d</uuid> | ||
716 | <memory unit='KiB'>4194304</memory> | ||
717 | <currentMemory unit='KiB'>4194304</currentMemory> | ||
718 | <memoryBacking> | ||
719 | <hugepages> | ||
720 | <page size='1' unit='G' nodeset='0'/> | ||
721 | </hugepages> | ||
722 | </memoryBacking> | ||
723 | <vcpu placement='static'>2</vcpu> | ||
724 | <cputune> | ||
725 | <shares>4096</shares> | ||
726 | <vcpupin vcpu='0' cpuset='4'/> | ||
727 | <vcpupin vcpu='1' cpuset='5'/> | ||
728 | <emulatorpin cpuset='4,5'/> | ||
729 | </cputune> | ||
730 | <os> | ||
731 | <type arch='x86_64' machine='q35'>hvm</type> | ||
732 | <kernel>/mnt/qemu/bzImage</kernel> | ||
733 | <cmdline>root=/dev/vda console=ttyS0,115200</cmdline> | ||
734 | <boot dev='hd'/> | ||
735 | </os> | ||
736 | <features> | ||
737 | <acpi/> | ||
738 | <apic/> | ||
739 | </features> | ||
740 | <cpu mode='host-model'> | ||
741 | <model fallback='allow'/> | ||
742 | <topology sockets='2' cores='1' threads='1'/> | ||
743 | <numa> | ||
744 | <cell id='0' cpus='0-1' memory='4194304' unit='KiB' memAccess='shared'/> | ||
745 | </numa> | ||
746 | </cpu> | ||
747 | <on_poweroff>destroy</on_poweroff> | ||
748 | <on_reboot>restart</on_reboot> | ||
749 | <on_crash>destroy</on_crash> | ||
750 | <devices> | ||
751 | <emulator>/usr/bin/qemu-system-x86_64</emulator> | ||
752 | <disk type='file' device='disk'> | ||
753 | <driver name='qemu' type='raw' cache='none'/> | ||
754 | <source file='/mnt/qemu/enea-nfv-access-guest-qemux86-64.ext4'/> | ||
755 | <target dev='vda' bus='virtio'/> | ||
756 | </disk>vi | ||
757 | <interface type='bridge'> | ||
758 | <mac address='52:54:00:71:b1:b6'/> | ||
759 | <source bridge='ovsbr0'/> | ||
760 | <model type='rtl8139'/> | ||
761 | <virtualport type='openvswitch'/> | ||
762 | </interface> | ||
763 | <serial type='pty'> | ||
764 | <target port='0'/> | ||
765 | </serial> | ||
766 | <console type='pty'> | ||
767 | <target type='serial' port='0'/> | ||
768 | </console> | ||
769 | </devices> | ||
770 | </domain></programlisting> | ||
771 | </example> | ||
772 | </section> | ||
773 | </section> | ||
774 | </chapter> \ No newline at end of file | ||