diff options
Diffstat (limited to 'doc/book-enea-nfv-access-reference-guide-intel/doc/hypervisor_virtualization.xml')
-rw-r--r-- | doc/book-enea-nfv-access-reference-guide-intel/doc/hypervisor_virtualization.xml | 744 |
1 files changed, 0 insertions, 744 deletions
diff --git a/doc/book-enea-nfv-access-reference-guide-intel/doc/hypervisor_virtualization.xml b/doc/book-enea-nfv-access-reference-guide-intel/doc/hypervisor_virtualization.xml deleted file mode 100644 index cf712b4..0000000 --- a/doc/book-enea-nfv-access-reference-guide-intel/doc/hypervisor_virtualization.xml +++ /dev/null | |||
@@ -1,744 +0,0 @@ | |||
1 | <?xml version="1.0" encoding="ISO-8859-1"?> | ||
2 | <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd"> | ||
4 | <chapter id="hypervisor_virt"> | ||
5 | <title>Hypervisor Virtualization</title> | ||
6 | |||
7 | <para>The KVM, Kernel-based Virtual Machine, is a virtualization | ||
8 | infrastructure for the Linux kernel which turns it into a hypervisor. KVM | ||
9 | requires a processor with a hardware virtualization extension.</para> | ||
10 | |||
11 | <para>KVM uses QEMU, an open source machine emulator and virtualizer, to | ||
12 | virtualize a complete system. With KVM it is possible to run multiple guests | ||
13 | of a variety of operating systems, each with a complete set of virtualized | ||
14 | hardware.</para> | ||
15 | |||
16 | <section id="launch_virt_machine"> | ||
17 | <title>Launching a Virtual Machine</title> | ||
18 | |||
19 | <para>QEMU can make use of KVM when running a target architecture that is | ||
20 | the same as the host architecture. For instance, when running | ||
21 | qemu-system-x86_64 on an x86-64 compatible processor (containing | ||
22 | virtualization extensions Intel VT or AMD-V), you can take advantage of | ||
23 | the KVM acceleration, giving you benefit for your host and your guest | ||
24 | system.</para> | ||
25 | |||
26 | <para>Enea NFV Access includes an optimizied version of QEMU with KVM-only | ||
27 | support. To use KVM pass<command> --enable-kvm</command> to QEMU.</para> | ||
28 | |||
29 | <para>The following is an example of starting a guest:</para> | ||
30 | |||
31 | <programlisting>taskset -c 0,1 qemu-system-x86_64 \ | ||
32 | -cpu host -M q35 -smp cores=2,sockets=1 \ | ||
33 | -vcpu 0,affinity=0 -vcpu 1,affinity=1 \ | ||
34 | -enable-kvm -nographic \ | ||
35 | -kernel bzImage \ | ||
36 | -drive file=enea-nfv-access-guest-qemux86-64.ext4,if=virtio,format=raw \ | ||
37 | -append 'root=/dev/vda console=ttyS0,115200' \ | ||
38 | -m 4096 \ | ||
39 | -object memory-backend-file,id=mem,size=4096M,mem-path=/dev/hugepages,share=on \ | ||
40 | -numa node,memdev=mem -mem-prealloc</programlisting> | ||
41 | </section> | ||
42 | |||
43 | <section id="qemu_boot"> | ||
44 | <title>Main QEMU boot options</title> | ||
45 | |||
46 | <para>Below are detailed all the pertinent boot options for the QEMU | ||
47 | emulator:</para> | ||
48 | |||
49 | <itemizedlist> | ||
50 | <listitem> | ||
51 | <para>SMP - at least 2 cores should be enabled in order to isolate | ||
52 | application(s) running in virtual machine(s) on specific cores for | ||
53 | better performance.</para> | ||
54 | |||
55 | <programlisting>-smp cores=2,threads=1,sockets=1 \</programlisting> | ||
56 | </listitem> | ||
57 | |||
58 | <listitem> | ||
59 | <para>CPU affinity - associate virtual CPUs with physical CPUs and | ||
60 | optionally assign a default real time priority to the virtual CPU | ||
61 | process in the host kernel. This option allows you to start qemu vCPUs | ||
62 | on isolated physical CPUs.</para> | ||
63 | |||
64 | <programlisting>-vcpu 0,affinity=0 \</programlisting> | ||
65 | </listitem> | ||
66 | |||
67 | <listitem> | ||
68 | <para>Hugepages - KVM guests can be deployed with huge page memory | ||
69 | support in order to reduce memory consumption and improve performance, | ||
70 | by reducing CPU cache usage. By using huge pages for a KVM guest, less | ||
71 | memory is used for page tables and TLB (Translation Lookaside Buffer) | ||
72 | misses are reduced, thereby significantly increasing performance, | ||
73 | especially for memory-intensive situations.</para> | ||
74 | |||
75 | <programlisting>-object memory-backend-file,id=mem,size=4096M,mem-path=/dev/hugepages,share=on \</programlisting> | ||
76 | </listitem> | ||
77 | |||
78 | <listitem> | ||
79 | <para>Memory preallocation - preallocate huge pages at startup time | ||
80 | can improve performance but it may affect the qemu boot time.</para> | ||
81 | |||
82 | <programlisting>-mem-prealloc \</programlisting> | ||
83 | </listitem> | ||
84 | |||
85 | <listitem> | ||
86 | <para>Enable realtime characteristics - run qemu with realtime | ||
87 | features. While that mildly implies that "-realtime" alone might do | ||
88 | something, it's just an identifier for options that are partially | ||
89 | realtime. If you're running in a realtime or low latency environment, | ||
90 | you don't want your pages to be swapped out and mlock does that, thus | ||
91 | mlock=on. If you want VM density, then you may want swappable VMs, | ||
92 | thus mlock=off.</para> | ||
93 | |||
94 | <programlisting>-realtime mlock=on \</programlisting> | ||
95 | </listitem> | ||
96 | </itemizedlist> | ||
97 | |||
98 | <para>If the hardware does not have an IOMMU (known as "Intel VT-d" on | ||
99 | Intel-based machines and "AMD I/O Virtualization Technology" on AMD-based | ||
100 | machines), it will not be possible to assign devices in KVM. | ||
101 | Virtualization Technology features (VT-d, VT-x, etc.) must be enabled from | ||
102 | BIOS on the host target before starting a virtual machine.</para> | ||
103 | </section> | ||
104 | |||
105 | <section id="net_in_guest"> | ||
106 | <title>Networking in guest</title> | ||
107 | |||
108 | <section id="vhost-user-support"> | ||
109 | <title>Using vhost-user support</title> | ||
110 | |||
111 | <para>The goal of vhost-user is to implement a Virtio transport, staying | ||
112 | as close as possible to the vhost paradigm of using shared memory, | ||
113 | ioeventfds and irqfds. A UNIX domain socket based mechanism allows the | ||
114 | set up of resources used by a number of Vrings shared between two | ||
115 | userspace processes, which will be placed in shared memory.</para> | ||
116 | |||
117 | <para>To run QEMU with the vhost-user backend, you have to provide the | ||
118 | named UNIX domain socket which needs to be already opened by the | ||
119 | backend:</para> | ||
120 | |||
121 | <programlisting>-object memory-backend-file,id=mem,size=4096M,mem-path=/dev/hugepages,share=on \ | ||
122 | -chardev socket,id=char0,path=/var/run/openvswitch/vhost-user1 \ | ||
123 | -netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce \ | ||
124 | -device virtio-net-pci,netdev=mynet1,mac=52:54:00:00:00:01 \</programlisting> | ||
125 | |||
126 | <para>The vHost User standard uses a client-server model. The server | ||
127 | creates and manages the vHost User sockets and the client connects to | ||
128 | the sockets created by the server. It is recommended to use QEMU as | ||
129 | server so the vhost-user client can be restarted without affecting the | ||
130 | server, otherwise if the server side dies all clients need to be | ||
131 | restarted.</para> | ||
132 | |||
133 | <para>Using vhost-user in QEMU as server will offer the flexibility to | ||
134 | stop and start the virtual machine with no impact on virtual switch from | ||
135 | the host (vhost-user-client).</para> | ||
136 | |||
137 | <programlisting>-chardev socket,id=char0,path=/var/run/openvswitch/vhost-user1,server \</programlisting> | ||
138 | </section> | ||
139 | |||
140 | <section id="tap-interface"> | ||
141 | <title>Using TAP Interfaces</title> | ||
142 | |||
143 | <para>QEMU can use TAP interfaces to provide full networking capability | ||
144 | for the guest OS:</para> | ||
145 | |||
146 | <programlisting>-netdev tap,id=net0,ifname=tap0,script=no,downscript=no \ | ||
147 | -device virtio-net-pci,netdev=net0,mac=22:EA:FB:A8:25:AE \</programlisting> | ||
148 | </section> | ||
149 | |||
150 | <section id="vfio-passthrough"> | ||
151 | <title>VFIO passthrough VF (SR-IOV) to guest</title> | ||
152 | |||
153 | <para>KVM hypervisor support for attaching PCI devices on the host | ||
154 | system to guests. PCI passthrough allows guests to have exclusive access | ||
155 | to PCI devices for a range of tasks. PCI passthrough allows PCI devices | ||
156 | to appear and behave as if they were physically attached to the guest | ||
157 | operating system.</para> | ||
158 | |||
159 | <para>Preparing an Intel system for PCI passthrough:</para> | ||
160 | |||
161 | <itemizedlist> | ||
162 | <listitem> | ||
163 | <para>Enable the Intel VT-d extensions in BIOS</para> | ||
164 | </listitem> | ||
165 | |||
166 | <listitem> | ||
167 | <para>Activate Intel VT-d in the kernel by using | ||
168 | <literal>intel_iommu=on</literal> as a kernel boot parameter</para> | ||
169 | </listitem> | ||
170 | |||
171 | <listitem> | ||
172 | <para>Allow unsafe interrupts in case the system doesn't support | ||
173 | interrupt remapping. This can be done using | ||
174 | <literal>vfio_iommu_type1.allow_unsafe_interrupts=1</literal> as a | ||
175 | boot kernel parameter.</para> | ||
176 | </listitem> | ||
177 | </itemizedlist> | ||
178 | |||
179 | <para>Create guest with direct passthrough via VFIO framework like | ||
180 | so:</para> | ||
181 | |||
182 | <programlisting>-device vfio-pci,host=0000:03:10.2 \</programlisting> | ||
183 | |||
184 | <para>On the host, one or more Virtual Functions (VFs) must be created in | ||
185 | order to be allocated for a guest network to access, before starting | ||
186 | QEMU:</para> | ||
187 | |||
188 | <programlisting>$ echo 2 > /sys/class/net/eno3/device/sriov_numvfs | ||
189 | $ modprobe vfio_pci | ||
190 | $ dpdk-devbind --bind=vfio-pci 0000:03:10.2</programlisting> | ||
191 | </section> | ||
192 | |||
193 | <section id="multiqueue"> | ||
194 | <title>Multi-queue</title> | ||
195 | |||
196 | <para>In order to ensure that network performance scales as the number | ||
197 | of vCPUs increases, multi-queue support can be used in QEMU.</para> | ||
198 | |||
199 | <section id="qemu-multiqueue-support"> | ||
200 | <title>QEMU multi queue support configuration</title> | ||
201 | |||
202 | <programlisting>-chardev socket,id=char0,path=/var/run/openvswitch/vhost-user1 \ | ||
203 | -netdev type=vhost-user,id=net0,chardev=char0,queues=2 \ | ||
204 | -device virtio-net-pci,netdev=net0,mac=22:EA:FB:A8:25:AE,mq=on,vectors=6 | ||
205 | where vectors is calculated as: 2 + 2 * queues number.</programlisting> | ||
206 | </section> | ||
207 | |||
208 | <section id="inside-guest"> | ||
209 | <title>Inside guest</title> | ||
210 | |||
211 | <para>Linux kernel virtio-net driver (one queue is enabled by | ||
212 | default):</para> | ||
213 | |||
214 | <programlisting>$ ethtool -L combined 2 eth0 | ||
215 | DPDK Virtio PMD | ||
216 | $ testpmd -c 0x7 -- -i --rxq=2 --txq=2 --nb-cores=2 ...</programlisting> | ||
217 | |||
218 | <para>For QEMU documentation please see: <ulink | ||
219 | url="https://qemu.weilnetz.de/doc/qemu-doc.html">https://qemu.weilnetz.de/doc/qemu-doc.html</ulink>.</para> | ||
220 | </section> | ||
221 | </section> | ||
222 | </section> | ||
223 | |||
224 | <section id="libvirt"> | ||
225 | <title>Libvirt</title> | ||
226 | |||
227 | <para>One way to manage guests in Enea NFV Access is by using | ||
228 | <literal>libvirt</literal>. Libvirt is used in conjunction with a daemon | ||
229 | (<literal>libvirtd</literal>) and a command line utility (virsh) to manage | ||
230 | virtualized environments.</para> | ||
231 | |||
232 | <para>The libvirt library is a hypervisor-independent virtualization API | ||
233 | and toolkit that is able to interact with the virtualization capabilities | ||
234 | of a range of operating systems. Libvirt provides a common, generic and | ||
235 | stable layer to securely manage domains on a node. As nodes may be | ||
236 | remotely located, libvirt provides all methods required to provision, | ||
237 | create, modify, monitor, control, migrate and stop the domains, within the | ||
238 | limits of hypervisor support for these operations.</para> | ||
239 | |||
240 | <para>The libvirt daemon runs on the Enea NFV Access host. All tools built | ||
241 | on libvirt API connect to the daemon to request the desired operation, and | ||
242 | to collect information about the configuration and resources of the host | ||
243 | system and guests. <literal>virsh</literal> is a command line interface | ||
244 | tool for managing guests and the hypervisor. The virsh tool is built on | ||
245 | the libvirt management API.</para> | ||
246 | |||
247 | <para><emphasis role="bold">Major functionality provided by | ||
248 | libvirt</emphasis></para> | ||
249 | |||
250 | <para>The following is a summary from the libvirt <ulink | ||
251 | url="http://wiki.libvirt.org/page/FAQ#What_is_libvirt.3F">home | ||
252 | page</ulink> describing the major libvirt features:</para> | ||
253 | |||
254 | <itemizedlist> | ||
255 | <listitem> | ||
256 | <para><emphasis role="bold">VM management:</emphasis> Various domain | ||
257 | lifecycle operations such as start, stop, pause, save, restore, and | ||
258 | migrate. Hotplug operations for many device types including disk and | ||
259 | network interfaces, memory, and cpus.</para> | ||
260 | </listitem> | ||
261 | |||
262 | <listitem> | ||
263 | <para><emphasis role="bold">Remote machine support:</emphasis> All | ||
264 | libvirt functionality is accessible on any machine running the libvirt | ||
265 | daemon, including remote machines. A variety of network transports are | ||
266 | supported for connecting remotely, with the simplest being | ||
267 | <literal>SSH</literal>, which requires no extra explicit | ||
268 | configuration. For more information, see: <ulink | ||
269 | url="http://libvirt.org/remote.html">http://libvirt.org/remote.html</ulink>.</para> | ||
270 | </listitem> | ||
271 | |||
272 | <listitem> | ||
273 | <para><emphasis role="bold">Network interface management:</emphasis> | ||
274 | Any host running the libvirt daemon can be used to manage physical and | ||
275 | logical network interfaces. Enumerate existing interfaces, as well as | ||
276 | configure (and create) interfaces, bridges, vlans, and bond devices. | ||
277 | For more details see: <ulink | ||
278 | url="https://fedorahosted.org/netcf/">https://fedorahosted.org/netcf/</ulink>.</para> | ||
279 | </listitem> | ||
280 | |||
281 | <listitem> | ||
282 | <para><emphasis role="bold">Virtual NAT and Route based | ||
283 | networking:</emphasis> Any host running the libvirt daemon can manage | ||
284 | and create virtual networks. Libvirt virtual networks use firewall | ||
285 | rules to act as a router, providing VMs transparent access to the host | ||
286 | machines network. For more information, see: <ulink | ||
287 | url="http://libvirt.org/archnetwork.html">http://libvirt.org/archnetwork.html</ulink>.</para> | ||
288 | </listitem> | ||
289 | |||
290 | <listitem> | ||
291 | <para><emphasis role="bold">Storage management:</emphasis> Any host | ||
292 | running the libvirt daemon can be used to manage various types of | ||
293 | storage: create file images of various formats (raw, qcow2, etc.), | ||
294 | mount NFS shares, enumerate existing LVM volume groups, create new LVM | ||
295 | volume groups and logical volumes, partition raw disk devices, mount | ||
296 | iSCSI shares, and much more. For more details, see: <ulink | ||
297 | url="http://libvirt.org/storage.html">http://libvirt.org/storage.html</ulink>.</para> | ||
298 | </listitem> | ||
299 | |||
300 | <listitem> | ||
301 | <para><emphasis role="bold">Libvirt Configuration:</emphasis> A | ||
302 | properly running libvirt requires that the following elements be in | ||
303 | place:</para> | ||
304 | |||
305 | <itemizedlist> | ||
306 | <listitem> | ||
307 | <para>Configuration files, located in the directory | ||
308 | <literal>/etc/libvirt</literal>. They include the daemon's | ||
309 | configuration file <literal>libvirtd.conf</literal>, and | ||
310 | hypervisor-specific configuration files, like | ||
311 | <literal>qemu.conf</literal> for the QEMU.</para> | ||
312 | </listitem> | ||
313 | |||
314 | <listitem> | ||
315 | <para>A running libvirtd daemon. The daemon is started | ||
316 | automatically in Enea NFV Access host.</para> | ||
317 | </listitem> | ||
318 | |||
319 | <listitem> | ||
320 | <para>Configuration files for the libvirt domains, or guests, to | ||
321 | be managed by the KVM host. The specifics for guest domains shall | ||
322 | be defined in an XML file of a format specified at <ulink | ||
323 | url="http://libvirt.org/formatdomain.html">http://libvirt.org/formatdomain.html</ulink>. | ||
324 | XML formats for other structures are specified at <ulink type="" | ||
325 | url="http://libvirt.org/format.html">http://libvirt.org/format.html</ulink>.</para> | ||
326 | </listitem> | ||
327 | </itemizedlist> | ||
328 | </listitem> | ||
329 | </itemizedlist> | ||
330 | |||
331 | <section id="boot-kvm-guest"> | ||
332 | <title>Booting a KVM Guest</title> | ||
333 | |||
334 | <para>There are several ways to boot a KVM guest. Here we describe how | ||
335 | to boot using a raw image. A direct kernel boot can be performed by | ||
336 | transferring the guest kernel and the file system files to the host and | ||
337 | specifying a <literal><kernel></literal> and an | ||
338 | <literal><initrd></literal> element inside the | ||
339 | <literal><os></literal> element of the guest XML file, as in the | ||
340 | following example:</para> | ||
341 | |||
342 | <programlisting><os> | ||
343 | <kernel>bzImage</kernel> | ||
344 | </os> | ||
345 | <devices> | ||
346 | <disk type='file' device='disk'> | ||
347 | <driver name='qemu' type='raw' cache='none'/> | ||
348 | <source file='enea-nfv-access-guest-qemux86-64.ext4'/> | ||
349 | <target dev='vda' bus='virtio'/> | ||
350 | </disk> | ||
351 | </devices></programlisting> | ||
352 | </section> | ||
353 | |||
354 | <section id="start-guest"> | ||
355 | <title>Starting a Guest</title> | ||
356 | |||
357 | <para>Command <command>virsh create</command> starts a guest:</para> | ||
358 | |||
359 | <programlisting>virsh create example-guest-x86.xml</programlisting> | ||
360 | |||
361 | <para>If further configurations are needed before the guest is reachable | ||
362 | through <literal>ssh</literal>, a console can be started using command | ||
363 | <command>virsh console</command>. The example below shows how to start a | ||
364 | console where kvm-example-guest is the name of the guest defined in the | ||
365 | guest XML file:</para> | ||
366 | |||
367 | <programlisting>virsh console kvm-example-guest</programlisting> | ||
368 | |||
369 | <para>This requires that the guest domain has a console configured in | ||
370 | the guest XML file:</para> | ||
371 | |||
372 | <programlisting><os> | ||
373 | <cmdline>console=ttyS0,115200</cmdline> | ||
374 | </os> | ||
375 | <devices> | ||
376 | <console type='pty'> | ||
377 | <target type='serial' port='0'/> | ||
378 | </console> | ||
379 | </devices></programlisting> | ||
380 | </section> | ||
381 | |||
382 | <section id="isolation"> | ||
383 | <title>Isolation</title> | ||
384 | |||
385 | <para>It may be desirable to isolate execution in a guest, to a specific | ||
386 | guest core. It might also be desirable to run a guest on a specific host | ||
387 | core.</para> | ||
388 | |||
389 | <para>To pin the virtual CPUs of the guest to specific cores, configure | ||
390 | the <literal><cputune></literal> contents as follows:</para> | ||
391 | |||
392 | <orderedlist> | ||
393 | <listitem> | ||
394 | <para>First explicitly state on which host core each guest core | ||
395 | shall run, by mapping <literal>vcpu</literal> to | ||
396 | <literal>cpuset</literal> in the <literal><vcpupin></literal> | ||
397 | tag.</para> | ||
398 | </listitem> | ||
399 | |||
400 | <listitem> | ||
401 | <para>In the <literal><cputune></literal> tag it is further | ||
402 | possible to specify on which CPU the emulator shall run by adding | ||
403 | the cpuset to the <literal><emulatorpin></literal> tag.</para> | ||
404 | |||
405 | <programlisting><vcpu placement='static'>2</vcpu> | ||
406 | <cputune> | ||
407 | <vcpupin vcpu='0' cpuset='2'/> | ||
408 | <vcpupin vcpu='1' cpuset='3'/> | ||
409 | <emulatorpin cpuset="2"/> | ||
410 | </cputune></programlisting> | ||
411 | |||
412 | <para><literal>libvirt</literal> will group all threads belonging to | ||
413 | a qemu instance into cgroups that will be created for that purpose. | ||
414 | It is possible to supply a base name for those cgroups using the | ||
415 | <literal><resource></literal> tag:</para> | ||
416 | |||
417 | <programlisting><resource> | ||
418 | <partition>/rt</partition> | ||
419 | </resource></programlisting> | ||
420 | </listitem> | ||
421 | </orderedlist> | ||
422 | </section> | ||
423 | |||
424 | <section id="network-libvirt"> | ||
425 | <title>Networking using libvirt</title> | ||
426 | |||
427 | <para>Command <command>virsh net-create</command> starts a network. If | ||
428 | any networks are listed in the guest XML file, those networks must be | ||
429 | started before the guest is started. As an example, if the network is | ||
430 | defined in a file named example-net.xml, it is started as | ||
431 | follows:</para> | ||
432 | |||
433 | <programlisting>virsh net-create example-net.xml | ||
434 | <network> | ||
435 | <name>sriov</name> | ||
436 | <forward mode='hostdev' managed='yes'> | ||
437 | <pf dev='eno3'/> | ||
438 | </forward> | ||
439 | </network></programlisting> | ||
440 | |||
441 | <para><literal>libvirt</literal> is a virtualization API that supports | ||
442 | virtual network creation. These networks can be connected to guests and | ||
443 | containers by referencing the network in the guest XML file. It is | ||
444 | possible to have a virtual network persistently running on the host by | ||
445 | starting the network with command <command>virsh net-define</command> | ||
446 | instead of the previously mentioned <command>virsh | ||
447 | net-create</command>.</para> | ||
448 | |||
449 | <para>An example for the sample network defined in | ||
450 | <literal>meta-vt/recipes-example/virt-example/files/example-net.xml</literal>:</para> | ||
451 | |||
452 | <programlisting>virsh net-define example-net.xml</programlisting> | ||
453 | |||
454 | <para>Command <command>virsh net-autostart</command> enables a | ||
455 | persistent network to start automatically when the libvirt daemon | ||
456 | starts:</para> | ||
457 | |||
458 | <programlisting>virsh net-autostart example-net</programlisting> | ||
459 | |||
460 | <para>Guest configuration file (xml) must be updated to access newly | ||
461 | created network like so:</para> | ||
462 | |||
463 | <programlisting> <interface type='network'> | ||
464 | <source network='sriov'/> | ||
465 | </interface></programlisting> | ||
466 | |||
467 | <para>The following presented here are a few modes of network access | ||
468 | from guest using <command>virsh</command>:</para> | ||
469 | |||
470 | <itemizedlist> | ||
471 | <listitem> | ||
472 | <para><emphasis role="bold">vhost-user interface</emphasis></para> | ||
473 | |||
474 | <para>See the Open vSwitch chapter on how to create vhost-user | ||
475 | interface using Open vSwitch. Currently there is no Open vSwitch | ||
476 | support for networks that are managed by libvirt (e.g. NAT). As of | ||
477 | now, only bridged networks are supported (those where the user has | ||
478 | to manually create the bridge).</para> | ||
479 | |||
480 | <programlisting> <interface type='vhostuser'> | ||
481 | <mac address='00:00:00:00:00:01'/> | ||
482 | <source type='unix' path='/var/run/openvswitch/vhost-user1' mode='client'/> | ||
483 | <model type='virtio'/> | ||
484 | <driver queues='1'> | ||
485 | <host mrg_rxbuf='off'/> | ||
486 | </driver> | ||
487 | </interface></programlisting> | ||
488 | </listitem> | ||
489 | |||
490 | <listitem> | ||
491 | <para><emphasis role="bold">PCI passthrough | ||
492 | (SR-IOV)</emphasis></para> | ||
493 | |||
494 | <para>KVM hypervisor support for attaching PCI devices on the host | ||
495 | system to guests. PCI passthrough allows guests to have exclusive | ||
496 | access to PCI devices for a range of tasks. PCI passthrough allows | ||
497 | PCI devices to appear and behave as if they were physically attached | ||
498 | to the guest operating system.</para> | ||
499 | |||
500 | <para>Preparing an Intel system for PCI passthrough is done like | ||
501 | so:</para> | ||
502 | |||
503 | <itemizedlist> | ||
504 | <listitem> | ||
505 | <para>Enable the Intel VT-d extensions in BIOS</para> | ||
506 | </listitem> | ||
507 | |||
508 | <listitem> | ||
509 | <para>Activate Intel VT-d in the kernel by using | ||
510 | <literal>intel_iommu=on</literal> as a kernel boot | ||
511 | parameter</para> | ||
512 | </listitem> | ||
513 | |||
514 | <listitem> | ||
515 | <para>Allow unsafe interrupts in case the system doesn't support | ||
516 | interrupt remapping. This can be done using | ||
517 | <literal>vfio_iommu_type1.allow_unsafe_interrupts=1</literal> as | ||
518 | a boot kernel parameter.</para> | ||
519 | </listitem> | ||
520 | </itemizedlist> | ||
521 | |||
522 | <para>VFs must be created on the host before starting the | ||
523 | guest:</para> | ||
524 | |||
525 | <programlisting>$ echo 2 > /sys/class/net/eno3/device/sriov_numvfs | ||
526 | $ modprobe vfio_pci | ||
527 | $ dpdk-devbind.py --bind=vfio-pci 0000:03:10.0 | ||
528 | <interface type='hostdev' managed='yes'> | ||
529 | <source> | ||
530 | <address type='pci' domain='0x0' bus='0x03' slot='0x10' function='0x0'/> | ||
531 | </source> | ||
532 | <mac address='52:54:00:6d:90:02'/> | ||
533 | </interface></programlisting> | ||
534 | </listitem> | ||
535 | |||
536 | <listitem> | ||
537 | <para><emphasis role="bold">Bridge interface</emphasis></para> | ||
538 | |||
539 | <para>In case an OVS bridge exists on host, it can be used to | ||
540 | connect the guest:</para> | ||
541 | |||
542 | <programlisting> <interface type='bridge'> | ||
543 | <mac address='52:54:00:71:b1:b6'/> | ||
544 | <source bridge='ovsbr0'/> | ||
545 | <virtualport type='openvswitch'/> | ||
546 | <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> | ||
547 | </interface></programlisting> | ||
548 | |||
549 | <para>For further details on the network XML format, see <ulink | ||
550 | url="http://libvirt.org/formatnetwork.html">http://libvirt.org/formatnetwork.html</ulink>.</para> | ||
551 | </listitem> | ||
552 | </itemizedlist> | ||
553 | </section> | ||
554 | |||
555 | <section id="libvirt-guest-config-ex"> | ||
556 | <title>Libvirt guest configuration examples</title> | ||
557 | |||
558 | <section id="guest-config-vhost-user-interface"> | ||
559 | <title>Guest configuration with vhost-user interface</title> | ||
560 | |||
561 | <programlisting><domain type='kvm'> | ||
562 | <name>vm_vhost</name> | ||
563 | <uuid>4a9b3f53-fa2a-47f3-a757-dd87720d9d1d</uuid> | ||
564 | <memory unit='KiB'>4194304</memory> | ||
565 | <currentMemory unit='KiB'>4194304</currentMemory> | ||
566 | <memoryBacking> | ||
567 | <hugepages> | ||
568 | <page size='1' unit='G' nodeset='0'/> | ||
569 | </hugepages> | ||
570 | </memoryBacking> | ||
571 | <vcpu placement='static'>2</vcpu> | ||
572 | <cputune> | ||
573 | <shares>4096</shares> | ||
574 | <vcpupin vcpu='0' cpuset='4'/> | ||
575 | <vcpupin vcpu='1' cpuset='5'/> | ||
576 | <emulatorpin cpuset='4,5'/> | ||
577 | </cputune> | ||
578 | <os> | ||
579 | <type arch='x86_64' machine='pc'>hvm</type> | ||
580 | <kernel>/mnt/qemu/bzImage</kernel> | ||
581 | <cmdline>root=/dev/vda console=ttyS0,115200</cmdline> | ||
582 | <boot dev='hd'/> | ||
583 | </os> | ||
584 | <features> | ||
585 | <acpi/> | ||
586 | <apic/> | ||
587 | </features> | ||
588 | <cpu mode='host-model'> | ||
589 | <model fallback='allow'/> | ||
590 | <topology sockets='2' cores='1' threads='1'/> | ||
591 | <numa> | ||
592 | <cell id='0' cpus='0-1' memory='4194304' unit='KiB' memAccess='shared'/> | ||
593 | </numa> | ||
594 | </cpu> | ||
595 | <on_poweroff>destroy</on_poweroff> | ||
596 | <on_reboot>restart</on_reboot> | ||
597 | <on_crash>destroy</on_crash> | ||
598 | <devices> | ||
599 | <emulator>/usr/bin/qemu-system-x86_64</emulator> | ||
600 | <disk type='file' device='disk'> | ||
601 | <driver name='qemu' type='raw' cache='none'/> | ||
602 | <source file='/mnt/qemu/enea-nfv-access-guest-qemux86-64.ext4'/> | ||
603 | <target dev='vda' bus='virtio'/> | ||
604 | </disk> | ||
605 | <interface type='vhostuser'> | ||
606 | <mac address='00:00:00:00:00:01'/> | ||
607 | <source type='unix' path='/var/run/openvswitch/vhost-user1' mode='client'/> | ||
608 | <model type='virtio'/> | ||
609 | <driver queues='1'> | ||
610 | <host mrg_rxbuf='off'/> | ||
611 | </driver> | ||
612 | </interface> | ||
613 | <serial type='pty'> | ||
614 | <target port='0'/> | ||
615 | </serial> | ||
616 | <console type='pty'> | ||
617 | <target type='serial' port='0'/> | ||
618 | </console> | ||
619 | </devices> | ||
620 | </domain></programlisting> | ||
621 | </section> | ||
622 | |||
623 | <section id="guest-config-pci-passthrough"> | ||
624 | <title>Guest configuration with PCI passthrough</title> | ||
625 | |||
626 | <programlisting><domain type='kvm'> | ||
627 | <name>vm_sriov1</name> | ||
628 | <uuid>4a9b3f53-fa2a-47f3-a757-dd87720d9d1d</uuid> | ||
629 | <memory unit='KiB'>4194304</memory> | ||
630 | <currentMemory unit='KiB'>4194304</currentMemory> | ||
631 | <memoryBacking> | ||
632 | <hugepages> | ||
633 | <page size='1' unit='G' nodeset='0'/> | ||
634 | </hugepages> | ||
635 | </memoryBacking> | ||
636 | <vcpu>2</vcpu> | ||
637 | <os> | ||
638 | <type arch='x86_64' machine='q35'>hvm</type> | ||
639 | <kernel>/mnt/qemu/bzImage</kernel> | ||
640 | <cmdline>root=/dev/vda console=ttyS0,115200</cmdline> | ||
641 | <boot dev='hd'/> | ||
642 | </os> | ||
643 | <features> | ||
644 | <acpi/> | ||
645 | <apic/> | ||
646 | </features> | ||
647 | <cpu mode='host-model'> | ||
648 | <model fallback='allow'/> | ||
649 | <topology sockets='1' cores='2' threads='1'/> | ||
650 | <numa> | ||
651 | <cell id='0' cpus='0' memory='4194304' unit='KiB' memAccess='shared'/> | ||
652 | </numa> | ||
653 | </cpu> | ||
654 | <on_poweroff>destroy</on_poweroff> | ||
655 | <on_reboot>restart</on_reboot> | ||
656 | <on_crash>destroy</on_crash> | ||
657 | <devices> | ||
658 | <emulator>/usr/bin/qemu-system-x86_64</emulator> | ||
659 | <disk type='file' device='disk'> | ||
660 | <driver name='qemu' type='raw' cache='none'/> | ||
661 | <source file='/mnt/qemu/enea-nfv-access-guest-qemux86-64.ext4'/> | ||
662 | <target dev='vda' bus='virtio'/> | ||
663 | </disk> | ||
664 | <interface type='hostdev' managed='yes'> | ||
665 | <source> | ||
666 | <address type='pci' domain='0x0' bus='0x03' slot='0x10' function='0x0'/> | ||
667 | </source> | ||
668 | <mac address='52:54:00:6d:90:02'/> | ||
669 | </interface> | ||
670 | <serial type='pty'> | ||
671 | <target port='0'/> | ||
672 | </serial> | ||
673 | <console type='pty'> | ||
674 | <target type='serial' port='0'/> | ||
675 | </console> | ||
676 | </devices> | ||
677 | </domain></programlisting> | ||
678 | </section> | ||
679 | |||
680 | <section id="guest-config-bridge-interface"> | ||
681 | <title>Guest configuration with bridge interface</title> | ||
682 | |||
683 | <programlisting><domain type='kvm'> | ||
684 | <name>vm_bridge</name> | ||
685 | <uuid>4a9b3f53-fa2a-47f3-a757-dd87720d9d1d</uuid> | ||
686 | <memory unit='KiB'>4194304</memory> | ||
687 | <currentMemory unit='KiB'>4194304</currentMemory> | ||
688 | <memoryBacking> | ||
689 | <hugepages> | ||
690 | <page size='1' unit='G' nodeset='0'/> | ||
691 | </hugepages> | ||
692 | </memoryBacking> | ||
693 | <vcpu placement='static'>2</vcpu> | ||
694 | <cputune> | ||
695 | <shares>4096</shares> | ||
696 | <vcpupin vcpu='0' cpuset='4'/> | ||
697 | <vcpupin vcpu='1' cpuset='5'/> | ||
698 | <emulatorpin cpuset='4,5'/> | ||
699 | </cputune> | ||
700 | <os> | ||
701 | <type arch='x86_64' machine='q35'>hvm</type> | ||
702 | <kernel>/mnt/qemu/bzImage</kernel> | ||
703 | <cmdline>root=/dev/vda console=ttyS0,115200</cmdline> | ||
704 | <boot dev='hd'/> | ||
705 | </os> | ||
706 | <features> | ||
707 | <acpi/> | ||
708 | <apic/> | ||
709 | </features> | ||
710 | <cpu mode='host-model'> | ||
711 | <model fallback='allow'/> | ||
712 | <topology sockets='2' cores='1' threads='1'/> | ||
713 | <numa> | ||
714 | <cell id='0' cpus='0-1' memory='4194304' unit='KiB' memAccess='shared'/> | ||
715 | </numa> | ||
716 | </cpu> | ||
717 | <on_poweroff>destroy</on_poweroff> | ||
718 | <on_reboot>restart</on_reboot> | ||
719 | <on_crash>destroy</on_crash> | ||
720 | <devices> | ||
721 | <emulator>/usr/bin/qemu-system-x86_64</emulator> | ||
722 | <disk type='file' device='disk'> | ||
723 | <driver name='qemu' type='raw' cache='none'/> | ||
724 | <source file='/mnt/qemu/enea-nfv-access-guest-qemux86-64.ext4'/> | ||
725 | <target dev='vda' bus='virtio'/> | ||
726 | </disk> | ||
727 | <interface type='bridge'> | ||
728 | <mac address='52:54:00:71:b1:b6'/> | ||
729 | <source bridge='ovsbr0'/> | ||
730 | <virtualport type='openvswitch'/> | ||
731 | <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> | ||
732 | </interface> | ||
733 | <serial type='pty'> | ||
734 | <target port='0'/> | ||
735 | </serial> | ||
736 | <console type='pty'> | ||
737 | <target type='serial' port='0'/> | ||
738 | </console> | ||
739 | </devices> | ||
740 | </domain></programlisting> | ||
741 | </section> | ||
742 | </section> | ||
743 | </section> | ||
744 | </chapter> | ||