diff options
| author | Adrian Dudau <adrian.dudau@enea.com> | 2014-06-26 14:36:22 +0200 |
|---|---|---|
| committer | Adrian Dudau <adrian.dudau@enea.com> | 2014-06-26 15:32:53 +0200 |
| commit | f4cf9fe05bb3f32fabea4e54dd92d368967a80da (patch) | |
| tree | 487180fa9866985ea7b28e625651765d86f515c3 /documentation/profile-manual | |
| download | poky-f4cf9fe05bb3f32fabea4e54dd92d368967a80da.tar.gz | |
initial commit for Enea Linux 4.0
Migrated from the internal git server on the daisy-enea branch
Signed-off-by: Adrian Dudau <adrian.dudau@enea.com>
Diffstat (limited to 'documentation/profile-manual')
40 files changed, 4977 insertions, 0 deletions
diff --git a/documentation/profile-manual/figures/kernelshark-all.png b/documentation/profile-manual/figures/kernelshark-all.png new file mode 100644 index 0000000000..99b40bafe5 --- /dev/null +++ b/documentation/profile-manual/figures/kernelshark-all.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/kernelshark-choose-events.png b/documentation/profile-manual/figures/kernelshark-choose-events.png new file mode 100644 index 0000000000..e8dd62a571 --- /dev/null +++ b/documentation/profile-manual/figures/kernelshark-choose-events.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/kernelshark-i915-display.png b/documentation/profile-manual/figures/kernelshark-i915-display.png new file mode 100644 index 0000000000..bb0edfb7fd --- /dev/null +++ b/documentation/profile-manual/figures/kernelshark-i915-display.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/kernelshark-output-display.png b/documentation/profile-manual/figures/kernelshark-output-display.png new file mode 100644 index 0000000000..ae2d0e5730 --- /dev/null +++ b/documentation/profile-manual/figures/kernelshark-output-display.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/lttngmain0.png b/documentation/profile-manual/figures/lttngmain0.png new file mode 100644 index 0000000000..5f60113cc3 --- /dev/null +++ b/documentation/profile-manual/figures/lttngmain0.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/oprofileui-busybox.png b/documentation/profile-manual/figures/oprofileui-busybox.png new file mode 100644 index 0000000000..a8275c65d2 --- /dev/null +++ b/documentation/profile-manual/figures/oprofileui-busybox.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/oprofileui-copy-to-user.png b/documentation/profile-manual/figures/oprofileui-copy-to-user.png new file mode 100644 index 0000000000..deb6470204 --- /dev/null +++ b/documentation/profile-manual/figures/oprofileui-copy-to-user.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/oprofileui-downloading.png b/documentation/profile-manual/figures/oprofileui-downloading.png new file mode 100644 index 0000000000..57742d6723 --- /dev/null +++ b/documentation/profile-manual/figures/oprofileui-downloading.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/oprofileui-processes.png b/documentation/profile-manual/figures/oprofileui-processes.png new file mode 100644 index 0000000000..ae547028f4 --- /dev/null +++ b/documentation/profile-manual/figures/oprofileui-processes.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-probe-do_fork-profile.png b/documentation/profile-manual/figures/perf-probe-do_fork-profile.png new file mode 100644 index 0000000000..1a1070deb8 --- /dev/null +++ b/documentation/profile-manual/figures/perf-probe-do_fork-profile.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-report-cycles-u.png b/documentation/profile-manual/figures/perf-report-cycles-u.png new file mode 100644 index 0000000000..68ec6af80b --- /dev/null +++ b/documentation/profile-manual/figures/perf-report-cycles-u.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-systemwide-libc.png b/documentation/profile-manual/figures/perf-systemwide-libc.png new file mode 100644 index 0000000000..2b72869c77 --- /dev/null +++ b/documentation/profile-manual/figures/perf-systemwide-libc.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-systemwide.png b/documentation/profile-manual/figures/perf-systemwide.png new file mode 100644 index 0000000000..12ce2444ae --- /dev/null +++ b/documentation/profile-manual/figures/perf-systemwide.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-wget-busybox-annotate-menu.png b/documentation/profile-manual/figures/perf-wget-busybox-annotate-menu.png new file mode 100644 index 0000000000..ceb34eaead --- /dev/null +++ b/documentation/profile-manual/figures/perf-wget-busybox-annotate-menu.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-wget-busybox-annotate-udhcpc.png b/documentation/profile-manual/figures/perf-wget-busybox-annotate-udhcpc.png new file mode 100644 index 0000000000..3581e9daa6 --- /dev/null +++ b/documentation/profile-manual/figures/perf-wget-busybox-annotate-udhcpc.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-wget-busybox-debuginfo.png b/documentation/profile-manual/figures/perf-wget-busybox-debuginfo.png new file mode 100644 index 0000000000..c317b49a4e --- /dev/null +++ b/documentation/profile-manual/figures/perf-wget-busybox-debuginfo.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-wget-busybox-dso-zoom-menu.png b/documentation/profile-manual/figures/perf-wget-busybox-dso-zoom-menu.png new file mode 100644 index 0000000000..1913c867d0 --- /dev/null +++ b/documentation/profile-manual/figures/perf-wget-busybox-dso-zoom-menu.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-wget-busybox-dso-zoom.png b/documentation/profile-manual/figures/perf-wget-busybox-dso-zoom.png new file mode 100644 index 0000000000..a1962c437a --- /dev/null +++ b/documentation/profile-manual/figures/perf-wget-busybox-dso-zoom.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-wget-busybox-expanded-stripped.png b/documentation/profile-manual/figures/perf-wget-busybox-expanded-stripped.png new file mode 100644 index 0000000000..b642d06c8b --- /dev/null +++ b/documentation/profile-manual/figures/perf-wget-busybox-expanded-stripped.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-wget-flat-stripped.png b/documentation/profile-manual/figures/perf-wget-flat-stripped.png new file mode 100644 index 0000000000..c8f395ab53 --- /dev/null +++ b/documentation/profile-manual/figures/perf-wget-flat-stripped.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-wget-g-copy-from-user-expanded-stripped.png b/documentation/profile-manual/figures/perf-wget-g-copy-from-user-expanded-stripped.png new file mode 100644 index 0000000000..bb7c764ce0 --- /dev/null +++ b/documentation/profile-manual/figures/perf-wget-g-copy-from-user-expanded-stripped.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-wget-g-copy-to-user-expanded-debuginfo.png b/documentation/profile-manual/figures/perf-wget-g-copy-to-user-expanded-debuginfo.png new file mode 100644 index 0000000000..a799af5127 --- /dev/null +++ b/documentation/profile-manual/figures/perf-wget-g-copy-to-user-expanded-debuginfo.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-wget-g-copy-to-user-expanded-stripped-unresolved-hidden.png b/documentation/profile-manual/figures/perf-wget-g-copy-to-user-expanded-stripped-unresolved-hidden.png new file mode 100644 index 0000000000..e91808ae40 --- /dev/null +++ b/documentation/profile-manual/figures/perf-wget-g-copy-to-user-expanded-stripped-unresolved-hidden.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/perf-wget-g-copy-to-user-expanded-stripped.png b/documentation/profile-manual/figures/perf-wget-g-copy-to-user-expanded-stripped.png new file mode 100644 index 0000000000..812302d0a8 --- /dev/null +++ b/documentation/profile-manual/figures/perf-wget-g-copy-to-user-expanded-stripped.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/profile-title.png b/documentation/profile-manual/figures/profile-title.png new file mode 100644 index 0000000000..ce5c682b58 --- /dev/null +++ b/documentation/profile-manual/figures/profile-title.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/pybootchartgui-linux-yocto.png b/documentation/profile-manual/figures/pybootchartgui-linux-yocto.png new file mode 100644 index 0000000000..2b6bfdacf9 --- /dev/null +++ b/documentation/profile-manual/figures/pybootchartgui-linux-yocto.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/pychart-linux-yocto-rpm-nostrip.png b/documentation/profile-manual/figures/pychart-linux-yocto-rpm-nostrip.png new file mode 100644 index 0000000000..444675c543 --- /dev/null +++ b/documentation/profile-manual/figures/pychart-linux-yocto-rpm-nostrip.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/pychart-linux-yocto-rpm.png b/documentation/profile-manual/figures/pychart-linux-yocto-rpm.png new file mode 100644 index 0000000000..8ee35352d8 --- /dev/null +++ b/documentation/profile-manual/figures/pychart-linux-yocto-rpm.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/sched-wakeup-profile.png b/documentation/profile-manual/figures/sched-wakeup-profile.png new file mode 100644 index 0000000000..2f25811889 --- /dev/null +++ b/documentation/profile-manual/figures/sched-wakeup-profile.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/sysprof-callers.png b/documentation/profile-manual/figures/sysprof-callers.png new file mode 100644 index 0000000000..640c8d9140 --- /dev/null +++ b/documentation/profile-manual/figures/sysprof-callers.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/sysprof-copy-from-user.png b/documentation/profile-manual/figures/sysprof-copy-from-user.png new file mode 100644 index 0000000000..8d31427824 --- /dev/null +++ b/documentation/profile-manual/figures/sysprof-copy-from-user.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/figures/sysprof-copy-to-user.png b/documentation/profile-manual/figures/sysprof-copy-to-user.png new file mode 100644 index 0000000000..7a5bab7991 --- /dev/null +++ b/documentation/profile-manual/figures/sysprof-copy-to-user.png | |||
| Binary files differ | |||
diff --git a/documentation/profile-manual/profile-manual-arch.xml b/documentation/profile-manual/profile-manual-arch.xml new file mode 100644 index 0000000000..19d1155229 --- /dev/null +++ b/documentation/profile-manual/profile-manual-arch.xml | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
| 2 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" | ||
| 3 | [<!ENTITY % poky SYSTEM "../poky.ent"> %poky; ] > | ||
| 4 | |||
| 5 | <chapter id='profile-manual-arch'> | ||
| 6 | |||
| 7 | <title>Overall Architecture of the Linux Tracing and Profiling Tools</title> | ||
| 8 | |||
| 9 | <section id='architecture-of-the-tracing-and-profiling-tools'> | ||
| 10 | <title>Architecture of the Tracing and Profiling Tools</title> | ||
| 11 | |||
| 12 | <para> | ||
| 13 | It may seem surprising to see a section covering an 'overall architecture' | ||
| 14 | for what seems to be a random collection of tracing tools that together | ||
| 15 | make up the Linux tracing and profiling space. | ||
| 16 | The fact is, however, that in recent years this seemingly disparate | ||
| 17 | set of tools has started to converge on a 'core' set of underlying | ||
| 18 | mechanisms: | ||
| 19 | </para> | ||
| 20 | |||
| 21 | <para> | ||
| 22 | <itemizedlist> | ||
| 23 | <listitem>static tracepoints</listitem> | ||
| 24 | <listitem>dynamic tracepoints | ||
| 25 | <itemizedlist> | ||
| 26 | <listitem>kprobes</listitem> | ||
| 27 | <listitem>uprobes</listitem> | ||
| 28 | </itemizedlist> | ||
| 29 | </listitem> | ||
| 30 | <listitem>the perf_events subsystem</listitem> | ||
| 31 | <listitem>debugfs</listitem> | ||
| 32 | </itemizedlist> | ||
| 33 | </para> | ||
| 34 | |||
| 35 | <informalexample> | ||
| 36 | <emphasis>Tying it Together:</emphasis> Rather than enumerating here how each tool makes use of | ||
| 37 | these common mechanisms, textboxes like this will make note of the | ||
| 38 | specific usages in each tool as they come up in the course | ||
| 39 | of the text. | ||
| 40 | </informalexample> | ||
| 41 | </section> | ||
| 42 | </chapter> | ||
| 43 | <!-- | ||
| 44 | vim: expandtab tw=80 ts=4 | ||
| 45 | --> | ||
diff --git a/documentation/profile-manual/profile-manual-customization.xsl b/documentation/profile-manual/profile-manual-customization.xsl new file mode 100644 index 0000000000..ead52ee7ac --- /dev/null +++ b/documentation/profile-manual/profile-manual-customization.xsl | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | <?xml version='1.0'?> | ||
| 2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns="http://www.w3.org/1999/xhtml" xmlns:fo="http://www.w3.org/1999/XSL/Format" version="1.0"> | ||
| 3 | |||
| 4 | <xsl:import href="http://docbook.sourceforge.net/release/xsl/current/xhtml/docbook.xsl" /> | ||
| 5 | |||
| 6 | <xsl:param name="html.stylesheet" select="'profile-manual-style.css'" /> | ||
| 7 | <xsl:param name="chapter.autolabel" select="1" /> | ||
| 8 | <xsl:param name="appendix.autolabel">A</xsl:param> | ||
| 9 | <xsl:param name="section.autolabel" select="1" /> | ||
| 10 | <xsl:param name="section.label.includes.component.label" select="1" /> | ||
| 11 | </xsl:stylesheet> | ||
diff --git a/documentation/profile-manual/profile-manual-eclipse-customization.xsl b/documentation/profile-manual/profile-manual-eclipse-customization.xsl new file mode 100644 index 0000000000..e4ff6e99ab --- /dev/null +++ b/documentation/profile-manual/profile-manual-eclipse-customization.xsl | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | <?xml version='1.0'?> | ||
| 2 | <xsl:stylesheet | ||
| 3 | xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | ||
| 4 | xmlns="http://www.w3.org/1999/xhtml" | ||
| 5 | xmlns:fo="http://www.w3.org/1999/XSL/Format" | ||
| 6 | version="1.0"> | ||
| 7 | |||
| 8 | <xsl:import | ||
| 9 | href="http://docbook.sourceforge.net/release/xsl/current/eclipse/eclipse3.xsl" /> | ||
| 10 | |||
| 11 | <xsl:param name="chunker.output.indent" select="'yes'"/> | ||
| 12 | <xsl:param name="chunk.quietly" select="1"/> | ||
| 13 | <xsl:param name="chunk.first.sections" select="1"/> | ||
| 14 | <xsl:param name="chunk.section.depth" select="10"/> | ||
| 15 | <xsl:param name="use.id.as.filename" select="1"/> | ||
| 16 | <xsl:param name="ulink.target" select="'_self'" /> | ||
| 17 | <xsl:param name="base.dir" select="'html/profile-manual/'"/> | ||
| 18 | <xsl:param name="html.stylesheet" select="'../book.css'"/> | ||
| 19 | <xsl:param name="eclipse.manifest" select="0"/> | ||
| 20 | <xsl:param name="create.plugin.xml" select="0"/> | ||
| 21 | <xsl:param name="suppress.navigation" select="1"/> | ||
| 22 | <xsl:param name="generate.index" select="0"/> | ||
| 23 | <xsl:param name="chapter.autolabel" select="1" /> | ||
| 24 | <xsl:param name="appendix.autolabel">A</xsl:param> | ||
| 25 | <xsl:param name="section.autolabel" select="1" /> | ||
| 26 | <xsl:param name="section.label.includes.component.label" select="1" /> | ||
| 27 | </xsl:stylesheet> | ||
diff --git a/documentation/profile-manual/profile-manual-examples.xml b/documentation/profile-manual/profile-manual-examples.xml new file mode 100644 index 0000000000..9630c6c307 --- /dev/null +++ b/documentation/profile-manual/profile-manual-examples.xml | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
| 2 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" | ||
| 3 | [<!ENTITY % poky SYSTEM "../poky.ent"> %poky; ] > | ||
| 4 | |||
| 5 | <chapter id='profile-manual-examples'> | ||
| 6 | |||
| 7 | <title>Real-World Examples</title> | ||
| 8 | |||
| 9 | <para> | ||
| 10 | This chapter contains real-world examples. | ||
| 11 | </para> | ||
| 12 | |||
| 13 | <section id='slow-write-speed-on-live-images'> | ||
| 14 | <title>Slow Write Speed on Live Images</title> | ||
| 15 | |||
| 16 | <para> | ||
| 17 | In one of our previous releases (denzil), users noticed that booting | ||
| 18 | off of a live image and writing to disk was noticeably slower. | ||
| 19 | This included the boot itself, especially the first one, since first | ||
| 20 | boots tend to do a significant amount of writing due to certain | ||
| 21 | post-install scripts. | ||
| 22 | </para> | ||
| 23 | |||
| 24 | <para> | ||
| 25 | The problem (and solution) was discovered by using the Yocto tracing | ||
| 26 | tools, in this case 'perf stat', 'perf script', 'perf record' | ||
| 27 | and 'perf report'. | ||
| 28 | </para> | ||
| 29 | |||
| 30 | <para> | ||
| 31 | See all the unvarnished details of how this bug was diagnosed and | ||
| 32 | solved here: Yocto Bug #3049 | ||
| 33 | </para> | ||
| 34 | </section> | ||
| 35 | |||
| 36 | </chapter> | ||
| 37 | <!-- | ||
| 38 | vim: expandtab tw=80 ts=4 | ||
| 39 | --> | ||
diff --git a/documentation/profile-manual/profile-manual-intro.xml b/documentation/profile-manual/profile-manual-intro.xml new file mode 100644 index 0000000000..96f819c4d9 --- /dev/null +++ b/documentation/profile-manual/profile-manual-intro.xml | |||
| @@ -0,0 +1,102 @@ | |||
| 1 | <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
| 2 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" | ||
| 3 | [<!ENTITY % poky SYSTEM "../poky.ent"> %poky; ] > | ||
| 4 | |||
| 5 | <chapter id='profile-manual-intro'> | ||
| 6 | |||
| 7 | <title>Yocto Project Tracing and Profiling Manual</title> | ||
| 8 | <section id='intro'> | ||
| 9 | <title>Introduction</title> | ||
| 10 | |||
| 11 | <para> | ||
| 12 | Yocto bundles a number of tracing and profiling tools - this 'HOWTO' | ||
| 13 | describes their basic usage and shows by example how to make use | ||
| 14 | of them to examine application and system behavior. | ||
| 15 | </para> | ||
| 16 | |||
| 17 | <para> | ||
| 18 | The tools presented are for the most part completely open-ended and | ||
| 19 | have quite good and/or extensive documentation of their own which | ||
| 20 | can be used to solve just about any problem you might come across | ||
| 21 | in Linux. | ||
| 22 | Each section that describes a particular tool has links to that | ||
| 23 | tool's documentation and website. | ||
| 24 | </para> | ||
| 25 | |||
| 26 | <para> | ||
| 27 | The purpose of this 'HOWTO' is to present a set of common and | ||
| 28 | generally useful tracing and profiling idioms along with their | ||
| 29 | application (as appropriate) to each tool, in the context of a | ||
| 30 | general-purpose 'drill-down' methodology that can be applied | ||
| 31 | to solving a large number (90%?) of problems. | ||
| 32 | For help with more advanced usages and problems, please see | ||
| 33 | the documentation and/or websites listed for each tool. | ||
| 34 | </para> | ||
| 35 | |||
| 36 | <para> | ||
| 37 | The final section of this 'HOWTO' is a collection of real-world | ||
| 38 | examples which we'll be continually adding to as we solve more | ||
| 39 | problems using the tools - feel free to add your own examples | ||
| 40 | to the list! | ||
| 41 | </para> | ||
| 42 | </section> | ||
| 43 | |||
| 44 | <section id='profile-manual-general-setup'> | ||
| 45 | <title>General Setup</title> | ||
| 46 | |||
| 47 | <para> | ||
| 48 | Most of the tools are available only in 'sdk' images or in images | ||
| 49 | built after adding 'tools-profile' to your local.conf. | ||
| 50 | So, in order to be able to access all of the tools described here, | ||
| 51 | please first build and boot an 'sdk' image e.g. | ||
| 52 | <literallayout class='monospaced'> | ||
| 53 | $ bitbake core-image-sato-sdk | ||
| 54 | </literallayout> | ||
| 55 | or alternatively by adding 'tools-profile' to the | ||
| 56 | EXTRA_IMAGE_FEATURES line in your local.conf: | ||
| 57 | <literallayout class='monospaced'> | ||
| 58 | EXTRA_IMAGE_FEATURES = "debug-tweaks tools-profile" | ||
| 59 | </literallayout> | ||
| 60 | If you use the 'tools-profile' method, you don't need to build an | ||
| 61 | sdk image - the tracing and profiling tools will be included in | ||
| 62 | non-sdk images as well e.g.: | ||
| 63 | <literallayout class='monospaced'> | ||
| 64 | $ bitbake core-image-sato | ||
| 65 | </literallayout> | ||
| 66 | <note><para> | ||
| 67 | By default, the Yocto build system strips symbols from the | ||
| 68 | binaries it packages, which makes it difficult to use some | ||
| 69 | of the tools. | ||
| 70 | </para><para>You can prevent that by putting the following | ||
| 71 | in your local.conf when you build the image: | ||
| 72 | </para> | ||
| 73 | </note> | ||
| 74 | <literallayout class='monospaced'> | ||
| 75 | INHIBIT_PACKAGE_STRIP = "1" | ||
| 76 | </literallayout> | ||
| 77 | The above setting will noticeably increase the size of your image. | ||
| 78 | </para> | ||
| 79 | |||
| 80 | <para> | ||
| 81 | If you've already built a stripped image, you can generate | ||
| 82 | debug packages (xxx-dbg) which you can manually install as | ||
| 83 | needed. | ||
| 84 | </para> | ||
| 85 | |||
| 86 | <para> | ||
| 87 | To generate debug info for packages, you can add dbg-pkgs to | ||
| 88 | EXTRA_IMAGE_FEATURES in local.conf. For example: | ||
| 89 | <literallayout class='monospaced'> | ||
| 90 | EXTRA_IMAGE_FEATURES = "debug-tweaks tools-profile dbg-pkgs" | ||
| 91 | </literallayout> | ||
| 92 | Additionally, in order to generate the right type of | ||
| 93 | debuginfo, we also need to add the following to local.conf: | ||
| 94 | <literallayout class='monospaced'> | ||
| 95 | PACKAGE_DEBUG_SPLIT_STYLE = 'debug-file-directory' | ||
| 96 | </literallayout> | ||
| 97 | </para> | ||
| 98 | </section> | ||
| 99 | </chapter> | ||
| 100 | <!-- | ||
| 101 | vim: expandtab tw=80 ts=4 | ||
| 102 | --> | ||
diff --git a/documentation/profile-manual/profile-manual-style.css b/documentation/profile-manual/profile-manual-style.css new file mode 100644 index 0000000000..7b1b342087 --- /dev/null +++ b/documentation/profile-manual/profile-manual-style.css | |||
| @@ -0,0 +1,978 @@ | |||
| 1 | /* | ||
| 2 | Generic XHTML / DocBook XHTML CSS Stylesheet. | ||
| 3 | |||
| 4 | Browser wrangling and typographic design by | ||
| 5 | Oyvind Kolas / pippin@gimp.org | ||
| 6 | |||
| 7 | Customised for Poky by | ||
| 8 | Matthew Allum / mallum@o-hand.com | ||
| 9 | |||
| 10 | Thanks to: | ||
| 11 | Liam R. E. Quin | ||
| 12 | William Skaggs | ||
| 13 | Jakub Steiner | ||
| 14 | |||
| 15 | Structure | ||
| 16 | --------- | ||
| 17 | |||
| 18 | The stylesheet is divided into the following sections: | ||
| 19 | |||
| 20 | Positioning | ||
| 21 | Margins, paddings, width, font-size, clearing. | ||
| 22 | Decorations | ||
| 23 | Borders, style | ||
| 24 | Colors | ||
| 25 | Colors | ||
| 26 | Graphics | ||
| 27 | Graphical backgrounds | ||
| 28 | Nasty IE tweaks | ||
| 29 | Workarounds needed to make it work in internet explorer, | ||
| 30 | currently makes the stylesheet non validating, but up until | ||
| 31 | this point it is validating. | ||
| 32 | Mozilla extensions | ||
| 33 | Transparency for footer | ||
| 34 | Rounded corners on boxes | ||
| 35 | |||
| 36 | */ | ||
| 37 | |||
| 38 | |||
| 39 | /*************** / | ||
| 40 | / Positioning / | ||
| 41 | / ***************/ | ||
| 42 | |||
| 43 | body { | ||
| 44 | font-family: Verdana, Sans, sans-serif; | ||
| 45 | |||
| 46 | min-width: 640px; | ||
| 47 | width: 80%; | ||
| 48 | margin: 0em auto; | ||
| 49 | padding: 2em 5em 5em 5em; | ||
| 50 | color: #333; | ||
| 51 | } | ||
| 52 | |||
| 53 | h1,h2,h3,h4,h5,h6,h7 { | ||
| 54 | font-family: Arial, Sans; | ||
| 55 | color: #00557D; | ||
| 56 | clear: both; | ||
| 57 | } | ||
| 58 | |||
| 59 | h1 { | ||
| 60 | font-size: 2em; | ||
| 61 | text-align: left; | ||
| 62 | padding: 0em 0em 0em 0em; | ||
| 63 | margin: 2em 0em 0em 0em; | ||
| 64 | } | ||
| 65 | |||
| 66 | h2.subtitle { | ||
| 67 | margin: 0.10em 0em 3.0em 0em; | ||
| 68 | padding: 0em 0em 0em 0em; | ||
| 69 | font-size: 1.8em; | ||
| 70 | padding-left: 20%; | ||
| 71 | font-weight: normal; | ||
| 72 | font-style: italic; | ||
| 73 | } | ||
| 74 | |||
| 75 | h2 { | ||
| 76 | margin: 2em 0em 0.66em 0em; | ||
| 77 | padding: 0.5em 0em 0em 0em; | ||
| 78 | font-size: 1.5em; | ||
| 79 | font-weight: bold; | ||
| 80 | } | ||
| 81 | |||
| 82 | h3.subtitle { | ||
| 83 | margin: 0em 0em 1em 0em; | ||
| 84 | padding: 0em 0em 0em 0em; | ||
| 85 | font-size: 142.14%; | ||
| 86 | text-align: right; | ||
| 87 | } | ||
| 88 | |||
| 89 | h3 { | ||
| 90 | margin: 1em 0em 0.5em 0em; | ||
| 91 | padding: 1em 0em 0em 0em; | ||
| 92 | font-size: 140%; | ||
| 93 | font-weight: bold; | ||
| 94 | } | ||
| 95 | |||
| 96 | h4 { | ||
| 97 | margin: 1em 0em 0.5em 0em; | ||
| 98 | padding: 1em 0em 0em 0em; | ||
| 99 | font-size: 120%; | ||
| 100 | font-weight: bold; | ||
| 101 | } | ||
| 102 | |||
| 103 | h5 { | ||
| 104 | margin: 1em 0em 0.5em 0em; | ||
| 105 | padding: 1em 0em 0em 0em; | ||
| 106 | font-size: 110%; | ||
| 107 | font-weight: bold; | ||
| 108 | } | ||
| 109 | |||
| 110 | h6 { | ||
| 111 | margin: 1em 0em 0em 0em; | ||
| 112 | padding: 1em 0em 0em 0em; | ||
| 113 | font-size: 110%; | ||
| 114 | font-weight: bold; | ||
| 115 | } | ||
| 116 | |||
| 117 | .authorgroup { | ||
| 118 | background-color: transparent; | ||
| 119 | background-repeat: no-repeat; | ||
| 120 | padding-top: 256px; | ||
| 121 | background-image: url("figures/profile-title.png"); | ||
| 122 | background-position: left top; | ||
| 123 | margin-top: -256px; | ||
| 124 | padding-right: 50px; | ||
| 125 | margin-left: 0px; | ||
| 126 | text-align: right; | ||
| 127 | width: 740px; | ||
| 128 | } | ||
| 129 | |||
| 130 | h3.author { | ||
| 131 | margin: 0em 0me 0em 0em; | ||
| 132 | padding: 0em 0em 0em 0em; | ||
| 133 | font-weight: normal; | ||
| 134 | font-size: 100%; | ||
| 135 | color: #333; | ||
| 136 | clear: both; | ||
| 137 | } | ||
| 138 | |||
| 139 | .author tt.email { | ||
| 140 | font-size: 66%; | ||
| 141 | } | ||
| 142 | |||
| 143 | .titlepage hr { | ||
| 144 | width: 0em; | ||
| 145 | clear: both; | ||
| 146 | } | ||
| 147 | |||
| 148 | .revhistory { | ||
| 149 | padding-top: 2em; | ||
| 150 | clear: both; | ||
| 151 | } | ||
| 152 | |||
| 153 | .toc, | ||
| 154 | .list-of-tables, | ||
| 155 | .list-of-examples, | ||
| 156 | .list-of-figures { | ||
| 157 | padding: 1.33em 0em 2.5em 0em; | ||
| 158 | color: #00557D; | ||
| 159 | } | ||
| 160 | |||
| 161 | .toc p, | ||
| 162 | .list-of-tables p, | ||
| 163 | .list-of-figures p, | ||
| 164 | .list-of-examples p { | ||
| 165 | padding: 0em 0em 0em 0em; | ||
| 166 | padding: 0em 0em 0.3em; | ||
| 167 | margin: 1.5em 0em 0em 0em; | ||
| 168 | } | ||
| 169 | |||
| 170 | .toc p b, | ||
| 171 | .list-of-tables p b, | ||
| 172 | .list-of-figures p b, | ||
| 173 | .list-of-examples p b{ | ||
| 174 | font-size: 100.0%; | ||
| 175 | font-weight: bold; | ||
| 176 | } | ||
| 177 | |||
| 178 | .toc dl, | ||
| 179 | .list-of-tables dl, | ||
| 180 | .list-of-figures dl, | ||
| 181 | .list-of-examples dl { | ||
| 182 | margin: 0em 0em 0.5em 0em; | ||
| 183 | padding: 0em 0em 0em 0em; | ||
| 184 | } | ||
| 185 | |||
| 186 | .toc dt { | ||
| 187 | margin: 0em 0em 0em 0em; | ||
| 188 | padding: 0em 0em 0em 0em; | ||
| 189 | } | ||
| 190 | |||
| 191 | .toc dd { | ||
| 192 | margin: 0em 0em 0em 2.6em; | ||
| 193 | padding: 0em 0em 0em 0em; | ||
| 194 | } | ||
| 195 | |||
| 196 | div.glossary dl, | ||
| 197 | div.variablelist dl { | ||
| 198 | } | ||
| 199 | |||
| 200 | .glossary dl dt, | ||
| 201 | .variablelist dl dt, | ||
| 202 | .variablelist dl dt span.term { | ||
| 203 | font-weight: normal; | ||
| 204 | width: 20em; | ||
| 205 | text-align: right; | ||
| 206 | } | ||
| 207 | |||
| 208 | .variablelist dl dt { | ||
| 209 | margin-top: 0.5em; | ||
| 210 | } | ||
| 211 | |||
| 212 | .glossary dl dd, | ||
| 213 | .variablelist dl dd { | ||
| 214 | margin-top: -1em; | ||
| 215 | margin-left: 25.5em; | ||
| 216 | } | ||
| 217 | |||
| 218 | .glossary dd p, | ||
| 219 | .variablelist dd p { | ||
| 220 | margin-top: 0em; | ||
| 221 | margin-bottom: 1em; | ||
| 222 | } | ||
| 223 | |||
| 224 | |||
| 225 | div.calloutlist table td { | ||
| 226 | padding: 0em 0em 0em 0em; | ||
| 227 | margin: 0em 0em 0em 0em; | ||
| 228 | } | ||
| 229 | |||
| 230 | div.calloutlist table td p { | ||
| 231 | margin-top: 0em; | ||
| 232 | margin-bottom: 1em; | ||
| 233 | } | ||
| 234 | |||
| 235 | div p.copyright { | ||
| 236 | text-align: left; | ||
| 237 | } | ||
| 238 | |||
| 239 | div.legalnotice p.legalnotice-title { | ||
| 240 | margin-bottom: 0em; | ||
| 241 | } | ||
| 242 | |||
| 243 | p { | ||
| 244 | line-height: 1.5em; | ||
| 245 | margin-top: 0em; | ||
| 246 | |||
| 247 | } | ||
| 248 | |||
| 249 | dl { | ||
| 250 | padding-top: 0em; | ||
| 251 | } | ||
| 252 | |||
| 253 | hr { | ||
| 254 | border: solid 1px; | ||
| 255 | } | ||
| 256 | |||
| 257 | |||
| 258 | .mediaobject, | ||
| 259 | .mediaobjectco { | ||
| 260 | text-align: center; | ||
| 261 | } | ||
| 262 | |||
| 263 | img { | ||
| 264 | border: none; | ||
| 265 | } | ||
| 266 | |||
| 267 | ul { | ||
| 268 | padding: 0em 0em 0em 1.5em; | ||
| 269 | } | ||
| 270 | |||
| 271 | ul li { | ||
| 272 | padding: 0em 0em 0em 0em; | ||
| 273 | } | ||
| 274 | |||
| 275 | ul li p { | ||
| 276 | text-align: left; | ||
| 277 | } | ||
| 278 | |||
| 279 | table { | ||
| 280 | width :100%; | ||
| 281 | } | ||
| 282 | |||
| 283 | th { | ||
| 284 | padding: 0.25em; | ||
| 285 | text-align: left; | ||
| 286 | font-weight: normal; | ||
| 287 | vertical-align: top; | ||
| 288 | } | ||
| 289 | |||
| 290 | td { | ||
| 291 | padding: 0.25em; | ||
| 292 | vertical-align: top; | ||
| 293 | } | ||
| 294 | |||
| 295 | p a[id] { | ||
| 296 | margin: 0px; | ||
| 297 | padding: 0px; | ||
| 298 | display: inline; | ||
| 299 | background-image: none; | ||
| 300 | } | ||
| 301 | |||
| 302 | a { | ||
| 303 | text-decoration: underline; | ||
| 304 | color: #444; | ||
| 305 | } | ||
| 306 | |||
| 307 | pre { | ||
| 308 | overflow: auto; | ||
| 309 | } | ||
| 310 | |||
| 311 | a:hover { | ||
| 312 | text-decoration: underline; | ||
| 313 | /*font-weight: bold;*/ | ||
| 314 | } | ||
| 315 | |||
| 316 | |||
| 317 | div.informalfigure, | ||
| 318 | div.informalexample, | ||
| 319 | div.informaltable, | ||
| 320 | div.figure, | ||
| 321 | div.table, | ||
| 322 | div.example { | ||
| 323 | margin: 1em 0em; | ||
| 324 | padding: 1em; | ||
| 325 | page-break-inside: avoid; | ||
| 326 | } | ||
| 327 | |||
| 328 | |||
| 329 | div.informalfigure p.title b, | ||
| 330 | div.informalexample p.title b, | ||
| 331 | div.informaltable p.title b, | ||
| 332 | div.figure p.title b, | ||
| 333 | div.example p.title b, | ||
| 334 | div.table p.title b{ | ||
| 335 | padding-top: 0em; | ||
| 336 | margin-top: 0em; | ||
| 337 | font-size: 100%; | ||
| 338 | font-weight: normal; | ||
| 339 | } | ||
| 340 | |||
| 341 | .mediaobject .caption, | ||
| 342 | .mediaobject .caption p { | ||
| 343 | text-align: center; | ||
| 344 | font-size: 80%; | ||
| 345 | padding-top: 0.5em; | ||
| 346 | padding-bottom: 0.5em; | ||
| 347 | } | ||
| 348 | |||
| 349 | .epigraph { | ||
| 350 | padding-left: 55%; | ||
| 351 | margin-bottom: 1em; | ||
| 352 | } | ||
| 353 | |||
| 354 | .epigraph p { | ||
| 355 | text-align: left; | ||
| 356 | } | ||
| 357 | |||
| 358 | .epigraph .quote { | ||
| 359 | font-style: italic; | ||
| 360 | } | ||
| 361 | .epigraph .attribution { | ||
| 362 | font-style: normal; | ||
| 363 | text-align: right; | ||
| 364 | } | ||
| 365 | |||
| 366 | span.application { | ||
| 367 | font-style: italic; | ||
| 368 | } | ||
| 369 | |||
| 370 | .programlisting { | ||
| 371 | font-family: monospace; | ||
| 372 | font-size: 80%; | ||
| 373 | white-space: pre; | ||
| 374 | margin: 1.33em 0em; | ||
| 375 | padding: 1.33em; | ||
| 376 | } | ||
| 377 | |||
| 378 | .tip, | ||
| 379 | .warning, | ||
| 380 | .caution, | ||
| 381 | .note { | ||
| 382 | margin-top: 1em; | ||
| 383 | margin-bottom: 1em; | ||
| 384 | |||
| 385 | } | ||
| 386 | |||
| 387 | /* force full width of table within div */ | ||
| 388 | .tip table, | ||
| 389 | .warning table, | ||
| 390 | .caution table, | ||
| 391 | .note table { | ||
| 392 | border: none; | ||
| 393 | width: 100%; | ||
| 394 | } | ||
| 395 | |||
| 396 | |||
| 397 | .tip table th, | ||
| 398 | .warning table th, | ||
| 399 | .caution table th, | ||
| 400 | .note table th { | ||
| 401 | padding: 0.8em 0.0em 0.0em 0.0em; | ||
| 402 | margin : 0em 0em 0em 0em; | ||
| 403 | } | ||
| 404 | |||
| 405 | .tip p, | ||
| 406 | .warning p, | ||
| 407 | .caution p, | ||
| 408 | .note p { | ||
| 409 | margin-top: 0.5em; | ||
| 410 | margin-bottom: 0.5em; | ||
| 411 | padding-right: 1em; | ||
| 412 | text-align: left; | ||
| 413 | } | ||
| 414 | |||
| 415 | .acronym { | ||
| 416 | text-transform: uppercase; | ||
| 417 | } | ||
| 418 | |||
| 419 | b.keycap, | ||
| 420 | .keycap { | ||
| 421 | padding: 0.09em 0.3em; | ||
| 422 | margin: 0em; | ||
| 423 | } | ||
| 424 | |||
| 425 | .itemizedlist li { | ||
| 426 | clear: none; | ||
| 427 | } | ||
| 428 | |||
| 429 | .filename { | ||
| 430 | font-size: medium; | ||
| 431 | font-family: Courier, monospace; | ||
| 432 | } | ||
| 433 | |||
| 434 | |||
| 435 | div.navheader, div.heading{ | ||
| 436 | position: absolute; | ||
| 437 | left: 0em; | ||
| 438 | top: 0em; | ||
| 439 | width: 100%; | ||
| 440 | background-color: #cdf; | ||
| 441 | width: 100%; | ||
| 442 | } | ||
| 443 | |||
| 444 | div.navfooter, div.footing{ | ||
| 445 | position: fixed; | ||
| 446 | left: 0em; | ||
| 447 | bottom: 0em; | ||
| 448 | background-color: #eee; | ||
| 449 | width: 100%; | ||
| 450 | } | ||
| 451 | |||
| 452 | |||
| 453 | div.navheader td, | ||
| 454 | div.navfooter td { | ||
| 455 | font-size: 66%; | ||
| 456 | } | ||
| 457 | |||
| 458 | div.navheader table th { | ||
| 459 | /*font-family: Georgia, Times, serif;*/ | ||
| 460 | /*font-size: x-large;*/ | ||
| 461 | font-size: 80%; | ||
| 462 | } | ||
| 463 | |||
| 464 | div.navheader table { | ||
| 465 | border-left: 0em; | ||
| 466 | border-right: 0em; | ||
| 467 | border-top: 0em; | ||
| 468 | width: 100%; | ||
| 469 | } | ||
| 470 | |||
| 471 | div.navfooter table { | ||
| 472 | border-left: 0em; | ||
| 473 | border-right: 0em; | ||
| 474 | border-bottom: 0em; | ||
| 475 | width: 100%; | ||
| 476 | } | ||
| 477 | |||
| 478 | div.navheader table td a, | ||
| 479 | div.navfooter table td a { | ||
| 480 | color: #777; | ||
| 481 | text-decoration: none; | ||
| 482 | } | ||
| 483 | |||
| 484 | /* normal text in the footer */ | ||
| 485 | div.navfooter table td { | ||
| 486 | color: black; | ||
| 487 | } | ||
| 488 | |||
| 489 | div.navheader table td a:visited, | ||
| 490 | div.navfooter table td a:visited { | ||
| 491 | color: #444; | ||
| 492 | } | ||
| 493 | |||
| 494 | |||
| 495 | /* links in header and footer */ | ||
| 496 | div.navheader table td a:hover, | ||
| 497 | div.navfooter table td a:hover { | ||
| 498 | text-decoration: underline; | ||
| 499 | background-color: transparent; | ||
| 500 | color: #33a; | ||
| 501 | } | ||
| 502 | |||
| 503 | div.navheader hr, | ||
| 504 | div.navfooter hr { | ||
| 505 | display: none; | ||
| 506 | } | ||
| 507 | |||
| 508 | |||
| 509 | .qandaset tr.question td p { | ||
| 510 | margin: 0em 0em 1em 0em; | ||
| 511 | padding: 0em 0em 0em 0em; | ||
| 512 | } | ||
| 513 | |||
| 514 | .qandaset tr.answer td p { | ||
| 515 | margin: 0em 0em 1em 0em; | ||
| 516 | padding: 0em 0em 0em 0em; | ||
| 517 | } | ||
| 518 | .answer td { | ||
| 519 | padding-bottom: 1.5em; | ||
| 520 | } | ||
| 521 | |||
| 522 | .emphasis { | ||
| 523 | font-weight: bold; | ||
| 524 | } | ||
| 525 | |||
| 526 | |||
| 527 | /************* / | ||
| 528 | / decorations / | ||
| 529 | / *************/ | ||
| 530 | |||
| 531 | .titlepage { | ||
| 532 | } | ||
| 533 | |||
| 534 | .part .title { | ||
| 535 | } | ||
| 536 | |||
| 537 | .subtitle { | ||
| 538 | border: none; | ||
| 539 | } | ||
| 540 | |||
| 541 | /* | ||
| 542 | h1 { | ||
| 543 | border: none; | ||
| 544 | } | ||
| 545 | |||
| 546 | h2 { | ||
| 547 | border-top: solid 0.2em; | ||
| 548 | border-bottom: solid 0.06em; | ||
| 549 | } | ||
| 550 | |||
| 551 | h3 { | ||
| 552 | border-top: 0em; | ||
| 553 | border-bottom: solid 0.06em; | ||
| 554 | } | ||
| 555 | |||
| 556 | h4 { | ||
| 557 | border: 0em; | ||
| 558 | border-bottom: solid 0.06em; | ||
| 559 | } | ||
| 560 | |||
| 561 | h5 { | ||
| 562 | border: 0em; | ||
| 563 | } | ||
| 564 | */ | ||
| 565 | |||
| 566 | .programlisting { | ||
| 567 | border: solid 1px; | ||
| 568 | } | ||
| 569 | |||
| 570 | div.figure, | ||
| 571 | div.table, | ||
| 572 | div.informalfigure, | ||
| 573 | div.informaltable, | ||
| 574 | div.informalexample, | ||
| 575 | div.example { | ||
| 576 | border: 1px solid; | ||
| 577 | } | ||
| 578 | |||
| 579 | |||
| 580 | |||
| 581 | .tip, | ||
| 582 | .warning, | ||
| 583 | .caution, | ||
| 584 | .note { | ||
| 585 | border: 1px solid; | ||
| 586 | } | ||
| 587 | |||
| 588 | .tip table th, | ||
| 589 | .warning table th, | ||
| 590 | .caution table th, | ||
| 591 | .note table th { | ||
| 592 | border-bottom: 1px solid; | ||
| 593 | } | ||
| 594 | |||
| 595 | .question td { | ||
| 596 | border-top: 1px solid black; | ||
| 597 | } | ||
| 598 | |||
| 599 | .answer { | ||
| 600 | } | ||
| 601 | |||
| 602 | |||
| 603 | b.keycap, | ||
| 604 | .keycap { | ||
| 605 | border: 1px solid; | ||
| 606 | } | ||
| 607 | |||
| 608 | |||
| 609 | div.navheader, div.heading{ | ||
| 610 | border-bottom: 1px solid; | ||
| 611 | } | ||
| 612 | |||
| 613 | |||
| 614 | div.navfooter, div.footing{ | ||
| 615 | border-top: 1px solid; | ||
| 616 | } | ||
| 617 | |||
| 618 | /********* / | ||
| 619 | / colors / | ||
| 620 | / *********/ | ||
| 621 | |||
| 622 | body { | ||
| 623 | color: #333; | ||
| 624 | background: white; | ||
| 625 | } | ||
| 626 | |||
| 627 | a { | ||
| 628 | background: transparent; | ||
| 629 | } | ||
| 630 | |||
| 631 | a:hover { | ||
| 632 | background-color: #dedede; | ||
| 633 | } | ||
| 634 | |||
| 635 | |||
| 636 | h1, | ||
| 637 | h2, | ||
| 638 | h3, | ||
| 639 | h4, | ||
| 640 | h5, | ||
| 641 | h6, | ||
| 642 | h7, | ||
| 643 | h8 { | ||
| 644 | background-color: transparent; | ||
| 645 | } | ||
| 646 | |||
| 647 | hr { | ||
| 648 | border-color: #aaa; | ||
| 649 | } | ||
| 650 | |||
| 651 | |||
| 652 | .tip, .warning, .caution, .note { | ||
| 653 | border-color: #fff; | ||
| 654 | } | ||
| 655 | |||
| 656 | |||
| 657 | .tip table th, | ||
| 658 | .warning table th, | ||
| 659 | .caution table th, | ||
| 660 | .note table th { | ||
| 661 | border-bottom-color: #fff; | ||
| 662 | } | ||
| 663 | |||
| 664 | |||
| 665 | .warning { | ||
| 666 | background-color: #f0f0f2; | ||
| 667 | } | ||
| 668 | |||
| 669 | .caution { | ||
| 670 | background-color: #f0f0f2; | ||
| 671 | } | ||
| 672 | |||
| 673 | .tip { | ||
| 674 | background-color: #f0f0f2; | ||
| 675 | } | ||
| 676 | |||
| 677 | .note { | ||
| 678 | background-color: #f0f0f2; | ||
| 679 | } | ||
| 680 | |||
| 681 | .glossary dl dt, | ||
| 682 | .variablelist dl dt, | ||
| 683 | .variablelist dl dt span.term { | ||
| 684 | color: #044; | ||
| 685 | } | ||
| 686 | |||
| 687 | div.figure, | ||
| 688 | div.table, | ||
| 689 | div.example, | ||
| 690 | div.informalfigure, | ||
| 691 | div.informaltable, | ||
| 692 | div.informalexample { | ||
| 693 | border-color: #aaa; | ||
| 694 | } | ||
| 695 | |||
| 696 | pre.programlisting { | ||
| 697 | color: black; | ||
| 698 | background-color: #fff; | ||
| 699 | border-color: #aaa; | ||
| 700 | border-width: 2px; | ||
| 701 | } | ||
| 702 | |||
| 703 | .guimenu, | ||
| 704 | .guilabel, | ||
| 705 | .guimenuitem { | ||
| 706 | background-color: #eee; | ||
| 707 | } | ||
| 708 | |||
| 709 | |||
| 710 | b.keycap, | ||
| 711 | .keycap { | ||
| 712 | background-color: #eee; | ||
| 713 | border-color: #999; | ||
| 714 | } | ||
| 715 | |||
| 716 | |||
| 717 | div.navheader { | ||
| 718 | border-color: black; | ||
| 719 | } | ||
| 720 | |||
| 721 | |||
| 722 | div.navfooter { | ||
| 723 | border-color: black; | ||
| 724 | } | ||
| 725 | |||
| 726 | |||
| 727 | /*********** / | ||
| 728 | / graphics / | ||
| 729 | / ***********/ | ||
| 730 | |||
| 731 | /* | ||
| 732 | body { | ||
| 733 | background-image: url("images/body_bg.jpg"); | ||
| 734 | background-attachment: fixed; | ||
| 735 | } | ||
| 736 | |||
| 737 | .navheader, | ||
| 738 | .note, | ||
| 739 | .tip { | ||
| 740 | background-image: url("images/note_bg.jpg"); | ||
| 741 | background-attachment: fixed; | ||
| 742 | } | ||
| 743 | |||
| 744 | .warning, | ||
| 745 | .caution { | ||
| 746 | background-image: url("images/warning_bg.jpg"); | ||
| 747 | background-attachment: fixed; | ||
| 748 | } | ||
| 749 | |||
| 750 | .figure, | ||
| 751 | .informalfigure, | ||
| 752 | .example, | ||
| 753 | .informalexample, | ||
| 754 | .table, | ||
| 755 | .informaltable { | ||
| 756 | background-image: url("images/figure_bg.jpg"); | ||
| 757 | background-attachment: fixed; | ||
| 758 | } | ||
| 759 | |||
| 760 | */ | ||
| 761 | h1, | ||
| 762 | h2, | ||
| 763 | h3, | ||
| 764 | h4, | ||
| 765 | h5, | ||
| 766 | h6, | ||
| 767 | h7{ | ||
| 768 | } | ||
| 769 | |||
| 770 | /* | ||
| 771 | Example of how to stick an image as part of the title. | ||
| 772 | |||
| 773 | div.article .titlepage .title | ||
| 774 | { | ||
| 775 | background-image: url("figures/white-on-black.png"); | ||
| 776 | background-position: center; | ||
| 777 | background-repeat: repeat-x; | ||
| 778 | } | ||
| 779 | */ | ||
| 780 | |||
| 781 | div.preface .titlepage .title, | ||
| 782 | div.colophon .title, | ||
| 783 | div.chapter .titlepage .title, | ||
| 784 | div.article .titlepage .title | ||
| 785 | { | ||
| 786 | } | ||
| 787 | |||
| 788 | div.section div.section .titlepage .title, | ||
| 789 | div.sect2 .titlepage .title { | ||
| 790 | background: none; | ||
| 791 | } | ||
| 792 | |||
| 793 | |||
| 794 | h1.title { | ||
| 795 | background-color: transparent; | ||
| 796 | background-image: url("figures/yocto-project-bw.png"); | ||
| 797 | background-repeat: no-repeat; | ||
| 798 | height: 256px; | ||
| 799 | text-indent: -9000px; | ||
| 800 | overflow:hidden; | ||
| 801 | } | ||
| 802 | |||
| 803 | h2.subtitle { | ||
| 804 | background-color: transparent; | ||
| 805 | text-indent: -9000px; | ||
| 806 | overflow:hidden; | ||
| 807 | width: 0px; | ||
| 808 | display: none; | ||
| 809 | } | ||
| 810 | |||
| 811 | /*************************************** / | ||
| 812 | / pippin.gimp.org specific alterations / | ||
| 813 | / ***************************************/ | ||
| 814 | |||
| 815 | /* | ||
| 816 | div.heading, div.navheader { | ||
| 817 | color: #777; | ||
| 818 | font-size: 80%; | ||
| 819 | padding: 0; | ||
| 820 | margin: 0; | ||
| 821 | text-align: left; | ||
| 822 | position: absolute; | ||
| 823 | top: 0px; | ||
| 824 | left: 0px; | ||
| 825 | width: 100%; | ||
| 826 | height: 50px; | ||
| 827 | background: url('/gfx/heading_bg.png') transparent; | ||
| 828 | background-repeat: repeat-x; | ||
| 829 | background-attachment: fixed; | ||
| 830 | border: none; | ||
| 831 | } | ||
| 832 | |||
| 833 | div.heading a { | ||
| 834 | color: #444; | ||
| 835 | } | ||
| 836 | |||
| 837 | div.footing, div.navfooter { | ||
| 838 | border: none; | ||
| 839 | color: #ddd; | ||
| 840 | font-size: 80%; | ||
| 841 | text-align:right; | ||
| 842 | |||
| 843 | width: 100%; | ||
| 844 | padding-top: 10px; | ||
| 845 | position: absolute; | ||
| 846 | bottom: 0px; | ||
| 847 | left: 0px; | ||
| 848 | |||
| 849 | background: url('/gfx/footing_bg.png') transparent; | ||
| 850 | } | ||
| 851 | */ | ||
| 852 | |||
| 853 | |||
| 854 | |||
| 855 | /****************** / | ||
| 856 | / nasty ie tweaks / | ||
| 857 | / ******************/ | ||
| 858 | |||
| 859 | /* | ||
| 860 | div.heading, div.navheader { | ||
| 861 | width:expression(document.body.clientWidth + "px"); | ||
| 862 | } | ||
| 863 | |||
| 864 | div.footing, div.navfooter { | ||
| 865 | width:expression(document.body.clientWidth + "px"); | ||
| 866 | margin-left:expression("-5em"); | ||
| 867 | } | ||
| 868 | body { | ||
| 869 | padding:expression("4em 5em 0em 5em"); | ||
| 870 | } | ||
| 871 | */ | ||
| 872 | |||
| 873 | /**************************************** / | ||
| 874 | / mozilla vendor specific css extensions / | ||
| 875 | / ****************************************/ | ||
| 876 | /* | ||
| 877 | div.navfooter, div.footing{ | ||
| 878 | -moz-opacity: 0.8em; | ||
| 879 | } | ||
| 880 | |||
| 881 | div.figure, | ||
| 882 | div.table, | ||
| 883 | div.informalfigure, | ||
| 884 | div.informaltable, | ||
| 885 | div.informalexample, | ||
| 886 | div.example, | ||
| 887 | .tip, | ||
| 888 | .warning, | ||
| 889 | .caution, | ||
| 890 | .note { | ||
| 891 | -moz-border-radius: 0.5em; | ||
| 892 | } | ||
| 893 | |||
| 894 | b.keycap, | ||
| 895 | .keycap { | ||
| 896 | -moz-border-radius: 0.3em; | ||
| 897 | } | ||
| 898 | */ | ||
| 899 | |||
| 900 | table tr td table tr td { | ||
| 901 | display: none; | ||
| 902 | } | ||
| 903 | |||
| 904 | |||
| 905 | hr { | ||
| 906 | display: none; | ||
| 907 | } | ||
| 908 | |||
| 909 | table { | ||
| 910 | border: 0em; | ||
| 911 | } | ||
| 912 | |||
| 913 | .photo { | ||
| 914 | float: right; | ||
| 915 | margin-left: 1.5em; | ||
| 916 | margin-bottom: 1.5em; | ||
| 917 | margin-top: 0em; | ||
| 918 | max-width: 17em; | ||
| 919 | border: 1px solid gray; | ||
| 920 | padding: 3px; | ||
| 921 | background: white; | ||
| 922 | } | ||
| 923 | .seperator { | ||
| 924 | padding-top: 2em; | ||
| 925 | clear: both; | ||
| 926 | } | ||
| 927 | |||
| 928 | #validators { | ||
| 929 | margin-top: 5em; | ||
| 930 | text-align: right; | ||
| 931 | color: #777; | ||
| 932 | } | ||
| 933 | @media print { | ||
| 934 | body { | ||
| 935 | font-size: 8pt; | ||
| 936 | } | ||
| 937 | .noprint { | ||
| 938 | display: none; | ||
| 939 | } | ||
| 940 | } | ||
| 941 | |||
| 942 | |||
| 943 | .tip, | ||
| 944 | .note { | ||
| 945 | background: #f0f0f2; | ||
| 946 | color: #333; | ||
| 947 | padding: 20px; | ||
| 948 | margin: 20px; | ||
| 949 | } | ||
| 950 | |||
| 951 | .tip h3, | ||
| 952 | .note h3 { | ||
| 953 | padding: 0em; | ||
| 954 | margin: 0em; | ||
| 955 | font-size: 2em; | ||
| 956 | font-weight: bold; | ||
| 957 | color: #333; | ||
| 958 | } | ||
| 959 | |||
| 960 | .tip a, | ||
| 961 | .note a { | ||
| 962 | color: #333; | ||
| 963 | text-decoration: underline; | ||
| 964 | } | ||
| 965 | |||
| 966 | .footnote { | ||
| 967 | font-size: small; | ||
| 968 | color: #333; | ||
| 969 | } | ||
| 970 | |||
| 971 | /* Changes the announcement text */ | ||
| 972 | .tip h3, | ||
| 973 | .warning h3, | ||
| 974 | .caution h3, | ||
| 975 | .note h3 { | ||
| 976 | font-size:large; | ||
| 977 | color: #00557D; | ||
| 978 | } | ||
diff --git a/documentation/profile-manual/profile-manual-usage.xml b/documentation/profile-manual/profile-manual-usage.xml new file mode 100644 index 0000000000..5577b1b001 --- /dev/null +++ b/documentation/profile-manual/profile-manual-usage.xml | |||
| @@ -0,0 +1,3685 @@ | |||
| 1 | <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
| 2 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" | ||
| 3 | [<!ENTITY % poky SYSTEM "../poky.ent"> %poky; ] > | ||
| 4 | |||
| 5 | <chapter id='profile-manual-usage'> | ||
| 6 | |||
| 7 | <title>Basic Usage (with examples) for each of the Yocto Tracing Tools</title> | ||
| 8 | |||
| 9 | <para> | ||
| 10 | This chapter presents basic usage examples for each of the tracing | ||
| 11 | tools. | ||
| 12 | </para> | ||
| 13 | |||
| 14 | <section id='profile-manual-perf'> | ||
| 15 | <title>perf</title> | ||
| 16 | |||
| 17 | <para> | ||
| 18 | The 'perf' tool is the profiling and tracing tool that comes | ||
| 19 | bundled with the Linux kernel. | ||
| 20 | </para> | ||
| 21 | |||
| 22 | <para> | ||
| 23 | Don't let the fact that it's part of the kernel fool you into thinking | ||
| 24 | that it's only for tracing and profiling the kernel - you can indeed | ||
| 25 | use it to trace and profile just the kernel, but you can also use it | ||
| 26 | to profile specific applications separately (with or without kernel | ||
| 27 | context), and you can also use it to trace and profile the kernel | ||
| 28 | and all applications on the system simultaneously to gain a system-wide | ||
| 29 | view of what's going on. | ||
| 30 | </para> | ||
| 31 | |||
| 32 | <para> | ||
| 33 | In many ways, perf aims to be a superset of all the tracing and profiling | ||
| 34 | tools available in Linux today, including all the other tools covered | ||
| 35 | in this HOWTO. The past couple of years have seen perf subsume a lot | ||
| 36 | of the functionality of those other tools and, at the same time, those | ||
| 37 | other tools have removed large portions of their previous functionality | ||
| 38 | and replaced it with calls to the equivalent functionality now | ||
| 39 | implemented by the perf subsystem. Extrapolation suggests that at | ||
| 40 | some point those other tools will simply become completely redundant | ||
| 41 | and go away; until then, we'll cover those other tools in these pages | ||
| 42 | and in many cases show how the same things can be accomplished in | ||
| 43 | perf and the other tools when it seems useful to do so. | ||
| 44 | </para> | ||
| 45 | |||
| 46 | <para> | ||
| 47 | The coverage below details some of the most common ways you'll likely | ||
| 48 | want to apply the tool; full documentation can be found either within | ||
| 49 | the tool itself or in the man pages at | ||
| 50 | <ulink url='http://linux.die.net/man/1/perf'>perf(1)</ulink>. | ||
| 51 | </para> | ||
| 52 | |||
| 53 | <section id='perf-setup'> | ||
| 54 | <title>Setup</title> | ||
| 55 | |||
| 56 | <para> | ||
| 57 | For this section, we'll assume you've already performed the basic | ||
| 58 | setup outlined in the General Setup section. | ||
| 59 | </para> | ||
| 60 | |||
| 61 | <para> | ||
| 62 | In particular, you'll get the most mileage out of perf if you | ||
| 63 | profile an image built with INHIBIT_PACKAGE_STRIP = "1" in your | ||
| 64 | local.conf. | ||
| 65 | </para> | ||
| 66 | |||
| 67 | <para> | ||
| 68 | perf runs on the target system for the most part. You can archive | ||
| 69 | profile data and copy it to the host for analysis, but for the | ||
| 70 | rest of this document we assume you've ssh'ed to the host and | ||
| 71 | will be running the perf commands on the target. | ||
| 72 | </para> | ||
| 73 | </section> | ||
| 74 | |||
| 75 | <section id='perf-basic-usage'> | ||
| 76 | <title>Basic Usage</title> | ||
| 77 | |||
| 78 | <para> | ||
| 79 | The perf tool is pretty much self-documenting. To remind yourself | ||
| 80 | of the available commands, simply type 'perf', which will show you | ||
| 81 | basic usage along with the available perf subcommands: | ||
| 82 | <literallayout class='monospaced'> | ||
| 83 | root@crownbay:~# perf | ||
| 84 | |||
| 85 | usage: perf [--version] [--help] COMMAND [ARGS] | ||
| 86 | |||
| 87 | The most commonly used perf commands are: | ||
| 88 | annotate Read perf.data (created by perf record) and display annotated code | ||
| 89 | archive Create archive with object files with build-ids found in perf.data file | ||
| 90 | bench General framework for benchmark suites | ||
| 91 | buildid-cache Manage build-id cache. | ||
| 92 | buildid-list List the buildids in a perf.data file | ||
| 93 | diff Read two perf.data files and display the differential profile | ||
| 94 | evlist List the event names in a perf.data file | ||
| 95 | inject Filter to augment the events stream with additional information | ||
| 96 | kmem Tool to trace/measure kernel memory(slab) properties | ||
| 97 | kvm Tool to trace/measure kvm guest os | ||
| 98 | list List all symbolic event types | ||
| 99 | lock Analyze lock events | ||
| 100 | probe Define new dynamic tracepoints | ||
| 101 | record Run a command and record its profile into perf.data | ||
| 102 | report Read perf.data (created by perf record) and display the profile | ||
| 103 | sched Tool to trace/measure scheduler properties (latencies) | ||
| 104 | script Read perf.data (created by perf record) and display trace output | ||
| 105 | stat Run a command and gather performance counter statistics | ||
| 106 | test Runs sanity tests. | ||
| 107 | timechart Tool to visualize total system behavior during a workload | ||
| 108 | top System profiling tool. | ||
| 109 | |||
| 110 | See 'perf help COMMAND' for more information on a specific command. | ||
| 111 | </literallayout> | ||
| 112 | </para> | ||
| 113 | |||
| 114 | <section id='using-perf-to-do-basic-profiling'> | ||
| 115 | <title>Using perf to do Basic Profiling</title> | ||
| 116 | |||
| 117 | <para> | ||
| 118 | As a simple test case, we'll profile the 'wget' of a fairly large | ||
| 119 | file, which is a minimally interesting case because it has both | ||
| 120 | file and network I/O aspects, and at least in the case of standard | ||
| 121 | Yocto images, it's implemented as part of busybox, so the methods | ||
| 122 | we use to analyze it can be used in a very similar way to the whole | ||
| 123 | host of supported busybox applets in Yocto. | ||
| 124 | <literallayout class='monospaced'> | ||
| 125 | root@crownbay:~# rm linux-2.6.19.2.tar.bz2; \ | ||
| 126 | wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
| 127 | </literallayout> | ||
| 128 | The quickest and easiest way to get some basic overall data about | ||
| 129 | what's going on for a particular workload is to profile it using | ||
| 130 | 'perf stat'. 'perf stat' basically profiles using a few default | ||
| 131 | counters and displays the summed counts at the end of the run: | ||
| 132 | <literallayout class='monospaced'> | ||
| 133 | root@crownbay:~# perf stat wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
| 134 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
| 135 | linux-2.6.19.2.tar.b 100% |***************************************************| 41727k 0:00:00 ETA | ||
| 136 | |||
| 137 | Performance counter stats for 'wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>': | ||
| 138 | |||
| 139 | 4597.223902 task-clock # 0.077 CPUs utilized | ||
| 140 | 23568 context-switches # 0.005 M/sec | ||
| 141 | 68 CPU-migrations # 0.015 K/sec | ||
| 142 | 241 page-faults # 0.052 K/sec | ||
| 143 | 3045817293 cycles # 0.663 GHz | ||
| 144 | <not supported> stalled-cycles-frontend | ||
| 145 | <not supported> stalled-cycles-backend | ||
| 146 | 858909167 instructions # 0.28 insns per cycle | ||
| 147 | 165441165 branches # 35.987 M/sec | ||
| 148 | 19550329 branch-misses # 11.82% of all branches | ||
| 149 | |||
| 150 | 59.836627620 seconds time elapsed | ||
| 151 | </literallayout> | ||
| 152 | Many times such a simple-minded test doesn't yield much of | ||
| 153 | interest, but sometimes it does (see Real-world Yocto bug | ||
| 154 | (slow loop-mounted write speed)). | ||
| 155 | </para> | ||
| 156 | |||
| 157 | <para> | ||
| 158 | Also, note that 'perf stat' isn't restricted to a fixed set of | ||
| 159 | counters - basically any event listed in the output of 'perf list' | ||
| 160 | can be tallied by 'perf stat'. For example, suppose we wanted to | ||
| 161 | see a summary of all the events related to kernel memory | ||
| 162 | allocation/freeing along with cache hits and misses: | ||
| 163 | <literallayout class='monospaced'> | ||
| 164 | root@crownbay:~# perf stat -e kmem:* -e cache-references -e cache-misses wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
| 165 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
| 166 | linux-2.6.19.2.tar.b 100% |***************************************************| 41727k 0:00:00 ETA | ||
| 167 | |||
| 168 | Performance counter stats for 'wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>': | ||
| 169 | |||
| 170 | 5566 kmem:kmalloc | ||
| 171 | 125517 kmem:kmem_cache_alloc | ||
| 172 | 0 kmem:kmalloc_node | ||
| 173 | 0 kmem:kmem_cache_alloc_node | ||
| 174 | 34401 kmem:kfree | ||
| 175 | 69920 kmem:kmem_cache_free | ||
| 176 | 133 kmem:mm_page_free | ||
| 177 | 41 kmem:mm_page_free_batched | ||
| 178 | 11502 kmem:mm_page_alloc | ||
| 179 | 11375 kmem:mm_page_alloc_zone_locked | ||
| 180 | 0 kmem:mm_page_pcpu_drain | ||
| 181 | 0 kmem:mm_page_alloc_extfrag | ||
| 182 | 66848602 cache-references | ||
| 183 | 2917740 cache-misses # 4.365 % of all cache refs | ||
| 184 | |||
| 185 | 44.831023415 seconds time elapsed | ||
| 186 | </literallayout> | ||
| 187 | So 'perf stat' gives us a nice easy way to get a quick overview of | ||
| 188 | what might be happening for a set of events, but normally we'd | ||
| 189 | need a little more detail in order to understand what's going on | ||
| 190 | in a way that we can act on in a useful way. | ||
| 191 | </para> | ||
| 192 | |||
| 193 | <para> | ||
| 194 | To dive down into a next level of detail, we can use 'perf | ||
| 195 | record'/'perf report' which will collect profiling data and | ||
| 196 | present it to use using an interactive text-based UI (or | ||
| 197 | simply as text if we specify --stdio to 'perf report'). | ||
| 198 | </para> | ||
| 199 | |||
| 200 | <para> | ||
| 201 | As our first attempt at profiling this workload, we'll simply | ||
| 202 | run 'perf record', handing it the workload we want to profile | ||
| 203 | (everything after 'perf record' and any perf options we hand | ||
| 204 | it - here none - will be executed in a new shell). perf collects | ||
| 205 | samples until the process exits and records them in a file named | ||
| 206 | 'perf.data' in the current working directory. | ||
| 207 | <literallayout class='monospaced'> | ||
| 208 | root@crownbay:~# perf record wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
| 209 | |||
| 210 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
| 211 | linux-2.6.19.2.tar.b 100% |************************************************| 41727k 0:00:00 ETA | ||
| 212 | [ perf record: Woken up 1 times to write data ] | ||
| 213 | [ perf record: Captured and wrote 0.176 MB perf.data (~7700 samples) ] | ||
| 214 | </literallayout> | ||
| 215 | To see the results in a 'text-based UI' (tui), simply run | ||
| 216 | 'perf report', which will read the perf.data file in the current | ||
| 217 | working directory and display the results in an interactive UI: | ||
| 218 | <literallayout class='monospaced'> | ||
| 219 | root@crownbay:~# perf report | ||
| 220 | </literallayout> | ||
| 221 | </para> | ||
| 222 | |||
| 223 | <para> | ||
| 224 | <imagedata fileref="figures/perf-wget-flat-stripped.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 225 | </para> | ||
| 226 | |||
| 227 | <para> | ||
| 228 | The above screenshot displays a 'flat' profile, one entry for | ||
| 229 | each 'bucket' corresponding to the functions that were profiled | ||
| 230 | during the profiling run, ordered from the most popular to the | ||
| 231 | least (perf has options to sort in various orders and keys as | ||
| 232 | well as display entries only above a certain threshold and so | ||
| 233 | on - see the perf documentation for details). Note that this | ||
| 234 | includes both userspace functions (entries containing a [.]) and | ||
| 235 | kernel functions accounted to the process (entries containing | ||
| 236 | a [k]). (perf has command-line modifiers that can be used to | ||
| 237 | restrict the profiling to kernel or userspace, among others). | ||
| 238 | </para> | ||
| 239 | |||
| 240 | <para> | ||
| 241 | Notice also that the above report shows an entry for 'busybox', | ||
| 242 | which is the executable that implements 'wget' in Yocto, but that | ||
| 243 | instead of a useful function name in that entry, it displays | ||
| 244 | a not-so-friendly hex value instead. The steps below will show | ||
| 245 | how to fix that problem. | ||
| 246 | </para> | ||
| 247 | |||
| 248 | <para> | ||
| 249 | Before we do that, however, let's try running a different profile, | ||
| 250 | one which shows something a little more interesting. The only | ||
| 251 | difference between the new profile and the previous one is that | ||
| 252 | we'll add the -g option, which will record not just the address | ||
| 253 | of a sampled function, but the entire callchain to the sampled | ||
| 254 | function as well: | ||
| 255 | <literallayout class='monospaced'> | ||
| 256 | root@crownbay:~# perf record -g wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
| 257 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
| 258 | linux-2.6.19.2.tar.b 100% |************************************************| 41727k 0:00:00 ETA | ||
| 259 | [ perf record: Woken up 3 times to write data ] | ||
| 260 | [ perf record: Captured and wrote 0.652 MB perf.data (~28476 samples) ] | ||
| 261 | |||
| 262 | |||
| 263 | root@crownbay:~# perf report | ||
| 264 | </literallayout> | ||
| 265 | </para> | ||
| 266 | |||
| 267 | <para> | ||
| 268 | <imagedata fileref="figures/perf-wget-g-copy-to-user-expanded-stripped.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 269 | </para> | ||
| 270 | |||
| 271 | <para> | ||
| 272 | Using the callgraph view, we can actually see not only which | ||
| 273 | functions took the most time, but we can also see a summary of | ||
| 274 | how those functions were called and learn something about how the | ||
| 275 | program interacts with the kernel in the process. | ||
| 276 | </para> | ||
| 277 | |||
| 278 | <para> | ||
| 279 | Notice that each entry in the above screenshot now contains a '+' | ||
| 280 | on the left-hand side. This means that we can expand the entry and | ||
| 281 | drill down into the callchains that feed into that entry. | ||
| 282 | Pressing 'enter' on any one of them will expand the callchain | ||
| 283 | (you can also press 'E' to expand them all at the same time or 'C' | ||
| 284 | to collapse them all). | ||
| 285 | </para> | ||
| 286 | |||
| 287 | <para> | ||
| 288 | In the screenshot above, we've toggled the __copy_to_user_ll() | ||
| 289 | entry and several subnodes all the way down. This lets us see | ||
| 290 | which callchains contributed to the profiled __copy_to_user_ll() | ||
| 291 | function which contributed 1.77% to the total profile. | ||
| 292 | </para> | ||
| 293 | |||
| 294 | <para> | ||
| 295 | As a bit of background explanation for these callchains, think | ||
| 296 | about what happens at a high level when you run wget to get a file | ||
| 297 | out on the network. Basically what happens is that the data comes | ||
| 298 | into the kernel via the network connection (socket) and is passed | ||
| 299 | to the userspace program 'wget' (which is actually a part of | ||
| 300 | busybox, but that's not important for now), which takes the buffers | ||
| 301 | the kernel passes to it and writes it to a disk file to save it. | ||
| 302 | </para> | ||
| 303 | |||
| 304 | <para> | ||
| 305 | The part of this process that we're looking at in the above call | ||
| 306 | stacks is the part where the kernel passes the data it's read from | ||
| 307 | the socket down to wget i.e. a copy-to-user. | ||
| 308 | </para> | ||
| 309 | |||
| 310 | <para> | ||
| 311 | Notice also that here there's also a case where the hex value | ||
| 312 | is displayed in the callstack, here in the expanded | ||
| 313 | sys_clock_gettime() function. Later we'll see it resolve to a | ||
| 314 | userspace function call in busybox. | ||
| 315 | </para> | ||
| 316 | |||
| 317 | <para> | ||
| 318 | <imagedata fileref="figures/perf-wget-g-copy-from-user-expanded-stripped.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 319 | </para> | ||
| 320 | |||
| 321 | <para> | ||
| 322 | The above screenshot shows the other half of the journey for the | ||
| 323 | data - from the wget program's userspace buffers to disk. To get | ||
| 324 | the buffers to disk, the wget program issues a write(2), which | ||
| 325 | does a copy-from-user to the kernel, which then takes care via | ||
| 326 | some circuitous path (probably also present somewhere in the | ||
| 327 | profile data), to get it safely to disk. | ||
| 328 | </para> | ||
| 329 | |||
| 330 | <para> | ||
| 331 | Now that we've seen the basic layout of the profile data and the | ||
| 332 | basics of how to extract useful information out of it, let's get | ||
| 333 | back to the task at hand and see if we can get some basic idea | ||
| 334 | about where the time is spent in the program we're profiling, | ||
| 335 | wget. Remember that wget is actually implemented as an applet | ||
| 336 | in busybox, so while the process name is 'wget', the executable | ||
| 337 | we're actually interested in is busybox. So let's expand the | ||
| 338 | first entry containing busybox: | ||
| 339 | </para> | ||
| 340 | |||
| 341 | <para> | ||
| 342 | <imagedata fileref="figures/perf-wget-busybox-expanded-stripped.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 343 | </para> | ||
| 344 | |||
| 345 | <para> | ||
| 346 | Again, before we expanded we saw that the function was labeled | ||
| 347 | with a hex value instead of a symbol as with most of the kernel | ||
| 348 | entries. Expanding the busybox entry doesn't make it any better. | ||
| 349 | </para> | ||
| 350 | |||
| 351 | <para> | ||
| 352 | The problem is that perf can't find the symbol information for the | ||
| 353 | busybox binary, which is actually stripped out by the Yocto build | ||
| 354 | system. | ||
| 355 | </para> | ||
| 356 | |||
| 357 | <para> | ||
| 358 | One way around that is to put the following in your local.conf | ||
| 359 | when you build the image: | ||
| 360 | <literallayout class='monospaced'> | ||
| 361 | INHIBIT_PACKAGE_STRIP = "1" | ||
| 362 | </literallayout> | ||
| 363 | However, we already have an image with the binaries stripped, | ||
| 364 | so what can we do to get perf to resolve the symbols? Basically | ||
| 365 | we need to install the debuginfo for the busybox package. | ||
| 366 | </para> | ||
| 367 | |||
| 368 | <para> | ||
| 369 | To generate the debug info for the packages in the image, we can | ||
| 370 | add dbg-pkgs to EXTRA_IMAGE_FEATURES in local.conf. For example: | ||
| 371 | <literallayout class='monospaced'> | ||
| 372 | EXTRA_IMAGE_FEATURES = "debug-tweaks tools-profile dbg-pkgs" | ||
| 373 | </literallayout> | ||
| 374 | Additionally, in order to generate the type of debuginfo that | ||
| 375 | perf understands, we also need to add the following to local.conf: | ||
| 376 | <literallayout class='monospaced'> | ||
| 377 | PACKAGE_DEBUG_SPLIT_STYLE = 'debug-file-directory' | ||
| 378 | </literallayout> | ||
| 379 | Once we've done that, we can install the debuginfo for busybox. | ||
| 380 | The debug packages once built can be found in | ||
| 381 | build/tmp/deploy/rpm/* on the host system. Find the | ||
| 382 | busybox-dbg-...rpm file and copy it to the target. For example: | ||
| 383 | <literallayout class='monospaced'> | ||
| 384 | [trz@empanada core2]$ scp /home/trz/yocto/crownbay-tracing-dbg/build/tmp/deploy/rpm/core2_32/busybox-dbg-1.20.2-r2.core2_32.rpm root@192.168.1.31: | ||
| 385 | root@192.168.1.31's password: | ||
| 386 | busybox-dbg-1.20.2-r2.core2_32.rpm 100% 1826KB 1.8MB/s 00:01 | ||
| 387 | </literallayout> | ||
| 388 | Now install the debug rpm on the target: | ||
| 389 | <literallayout class='monospaced'> | ||
| 390 | root@crownbay:~# rpm -i busybox-dbg-1.20.2-r2.core2_32.rpm | ||
| 391 | </literallayout> | ||
| 392 | Now that the debuginfo is installed, we see that the busybox | ||
| 393 | entries now display their functions symbolically: | ||
| 394 | </para> | ||
| 395 | |||
| 396 | <para> | ||
| 397 | <imagedata fileref="figures/perf-wget-busybox-debuginfo.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 398 | </para> | ||
| 399 | |||
| 400 | <para> | ||
| 401 | If we expand one of the entries and press 'enter' on a leaf node, | ||
| 402 | we're presented with a menu of actions we can take to get more | ||
| 403 | information related to that entry: | ||
| 404 | </para> | ||
| 405 | |||
| 406 | <para> | ||
| 407 | <imagedata fileref="figures/perf-wget-busybox-dso-zoom-menu.png" width="6in" depth="2in" align="center" scalefit="1" /> | ||
| 408 | </para> | ||
| 409 | |||
| 410 | <para> | ||
| 411 | One of these actions allows us to show a view that displays a | ||
| 412 | busybox-centric view of the profiled functions (in this case we've | ||
| 413 | also expanded all the nodes using the 'E' key): | ||
| 414 | </para> | ||
| 415 | |||
| 416 | <para> | ||
| 417 | <imagedata fileref="figures/perf-wget-busybox-dso-zoom.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 418 | </para> | ||
| 419 | |||
| 420 | <para> | ||
| 421 | Finally, we can see that now that the busybox debuginfo is | ||
| 422 | installed, the previously unresolved symbol in the | ||
| 423 | sys_clock_gettime() entry mentioned previously is now resolved, | ||
| 424 | and shows that the sys_clock_gettime system call that was the | ||
| 425 | source of 6.75% of the copy-to-user overhead was initiated by | ||
| 426 | the handle_input() busybox function: | ||
| 427 | </para> | ||
| 428 | |||
| 429 | <para> | ||
| 430 | <imagedata fileref="figures/perf-wget-g-copy-to-user-expanded-debuginfo.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 431 | </para> | ||
| 432 | |||
| 433 | <para> | ||
| 434 | At the lowest level of detail, we can dive down to the assembly | ||
| 435 | level and see which instructions caused the most overhead in a | ||
| 436 | function. Pressing 'enter' on the 'udhcpc_main' function, we're | ||
| 437 | again presented with a menu: | ||
| 438 | </para> | ||
| 439 | |||
| 440 | <para> | ||
| 441 | <imagedata fileref="figures/perf-wget-busybox-annotate-menu.png" width="6in" depth="2in" align="center" scalefit="1" /> | ||
| 442 | </para> | ||
| 443 | |||
| 444 | <para> | ||
| 445 | Selecting 'Annotate udhcpc_main', we get a detailed listing of | ||
| 446 | percentages by instruction for the udhcpc_main function. From the | ||
| 447 | display, we can see that over 50% of the time spent in this | ||
| 448 | function is taken up by a couple tests and the move of a | ||
| 449 | constant (1) to a register: | ||
| 450 | </para> | ||
| 451 | |||
| 452 | <para> | ||
| 453 | <imagedata fileref="figures/perf-wget-busybox-annotate-udhcpc.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 454 | </para> | ||
| 455 | |||
| 456 | <para> | ||
| 457 | As a segue into tracing, let's try another profile using a | ||
| 458 | different counter, something other than the default 'cycles'. | ||
| 459 | </para> | ||
| 460 | |||
| 461 | <para> | ||
| 462 | The tracing and profiling infrastructure in Linux has become | ||
| 463 | unified in a way that allows us to use the same tool with a | ||
| 464 | completely different set of counters, not just the standard | ||
| 465 | hardware counters that traditional tools have had to restrict | ||
| 466 | themselves to (of course the traditional tools can also make use | ||
| 467 | of the expanded possibilities now available to them, and in some | ||
| 468 | cases have, as mentioned previously). | ||
| 469 | </para> | ||
| 470 | |||
| 471 | <para> | ||
| 472 | We can get a list of the available events that can be used to | ||
| 473 | profile a workload via 'perf list': | ||
| 474 | <literallayout class='monospaced'> | ||
| 475 | root@crownbay:~# perf list | ||
| 476 | |||
| 477 | List of pre-defined events (to be used in -e): | ||
| 478 | cpu-cycles OR cycles [Hardware event] | ||
| 479 | stalled-cycles-frontend OR idle-cycles-frontend [Hardware event] | ||
| 480 | stalled-cycles-backend OR idle-cycles-backend [Hardware event] | ||
| 481 | instructions [Hardware event] | ||
| 482 | cache-references [Hardware event] | ||
| 483 | cache-misses [Hardware event] | ||
| 484 | branch-instructions OR branches [Hardware event] | ||
| 485 | branch-misses [Hardware event] | ||
| 486 | bus-cycles [Hardware event] | ||
| 487 | ref-cycles [Hardware event] | ||
| 488 | |||
| 489 | cpu-clock [Software event] | ||
| 490 | task-clock [Software event] | ||
| 491 | page-faults OR faults [Software event] | ||
| 492 | minor-faults [Software event] | ||
| 493 | major-faults [Software event] | ||
| 494 | context-switches OR cs [Software event] | ||
| 495 | cpu-migrations OR migrations [Software event] | ||
| 496 | alignment-faults [Software event] | ||
| 497 | emulation-faults [Software event] | ||
| 498 | |||
| 499 | L1-dcache-loads [Hardware cache event] | ||
| 500 | L1-dcache-load-misses [Hardware cache event] | ||
| 501 | L1-dcache-prefetch-misses [Hardware cache event] | ||
| 502 | L1-icache-loads [Hardware cache event] | ||
| 503 | L1-icache-load-misses [Hardware cache event] | ||
| 504 | . | ||
| 505 | . | ||
| 506 | . | ||
| 507 | rNNN [Raw hardware event descriptor] | ||
| 508 | cpu/t1=v1[,t2=v2,t3 ...]/modifier [Raw hardware event descriptor] | ||
| 509 | (see 'perf list --help' on how to encode it) | ||
| 510 | |||
| 511 | mem:<addr>[:access] [Hardware breakpoint] | ||
| 512 | |||
| 513 | sunrpc:rpc_call_status [Tracepoint event] | ||
| 514 | sunrpc:rpc_bind_status [Tracepoint event] | ||
| 515 | sunrpc:rpc_connect_status [Tracepoint event] | ||
| 516 | sunrpc:rpc_task_begin [Tracepoint event] | ||
| 517 | skb:kfree_skb [Tracepoint event] | ||
| 518 | skb:consume_skb [Tracepoint event] | ||
| 519 | skb:skb_copy_datagram_iovec [Tracepoint event] | ||
| 520 | net:net_dev_xmit [Tracepoint event] | ||
| 521 | net:net_dev_queue [Tracepoint event] | ||
| 522 | net:netif_receive_skb [Tracepoint event] | ||
| 523 | net:netif_rx [Tracepoint event] | ||
| 524 | napi:napi_poll [Tracepoint event] | ||
| 525 | sock:sock_rcvqueue_full [Tracepoint event] | ||
| 526 | sock:sock_exceed_buf_limit [Tracepoint event] | ||
| 527 | udp:udp_fail_queue_rcv_skb [Tracepoint event] | ||
| 528 | hda:hda_send_cmd [Tracepoint event] | ||
| 529 | hda:hda_get_response [Tracepoint event] | ||
| 530 | hda:hda_bus_reset [Tracepoint event] | ||
| 531 | scsi:scsi_dispatch_cmd_start [Tracepoint event] | ||
| 532 | scsi:scsi_dispatch_cmd_error [Tracepoint event] | ||
| 533 | scsi:scsi_eh_wakeup [Tracepoint event] | ||
| 534 | drm:drm_vblank_event [Tracepoint event] | ||
| 535 | drm:drm_vblank_event_queued [Tracepoint event] | ||
| 536 | drm:drm_vblank_event_delivered [Tracepoint event] | ||
| 537 | random:mix_pool_bytes [Tracepoint event] | ||
| 538 | random:mix_pool_bytes_nolock [Tracepoint event] | ||
| 539 | random:credit_entropy_bits [Tracepoint event] | ||
| 540 | gpio:gpio_direction [Tracepoint event] | ||
| 541 | gpio:gpio_value [Tracepoint event] | ||
| 542 | block:block_rq_abort [Tracepoint event] | ||
| 543 | block:block_rq_requeue [Tracepoint event] | ||
| 544 | block:block_rq_issue [Tracepoint event] | ||
| 545 | block:block_bio_bounce [Tracepoint event] | ||
| 546 | block:block_bio_complete [Tracepoint event] | ||
| 547 | block:block_bio_backmerge [Tracepoint event] | ||
| 548 | . | ||
| 549 | . | ||
| 550 | writeback:writeback_wake_thread [Tracepoint event] | ||
| 551 | writeback:writeback_wake_forker_thread [Tracepoint event] | ||
| 552 | writeback:writeback_bdi_register [Tracepoint event] | ||
| 553 | . | ||
| 554 | . | ||
| 555 | writeback:writeback_single_inode_requeue [Tracepoint event] | ||
| 556 | writeback:writeback_single_inode [Tracepoint event] | ||
| 557 | kmem:kmalloc [Tracepoint event] | ||
| 558 | kmem:kmem_cache_alloc [Tracepoint event] | ||
| 559 | kmem:mm_page_alloc [Tracepoint event] | ||
| 560 | kmem:mm_page_alloc_zone_locked [Tracepoint event] | ||
| 561 | kmem:mm_page_pcpu_drain [Tracepoint event] | ||
| 562 | kmem:mm_page_alloc_extfrag [Tracepoint event] | ||
| 563 | vmscan:mm_vmscan_kswapd_sleep [Tracepoint event] | ||
| 564 | vmscan:mm_vmscan_kswapd_wake [Tracepoint event] | ||
| 565 | vmscan:mm_vmscan_wakeup_kswapd [Tracepoint event] | ||
| 566 | vmscan:mm_vmscan_direct_reclaim_begin [Tracepoint event] | ||
| 567 | . | ||
| 568 | . | ||
| 569 | module:module_get [Tracepoint event] | ||
| 570 | module:module_put [Tracepoint event] | ||
| 571 | module:module_request [Tracepoint event] | ||
| 572 | sched:sched_kthread_stop [Tracepoint event] | ||
| 573 | sched:sched_wakeup [Tracepoint event] | ||
| 574 | sched:sched_wakeup_new [Tracepoint event] | ||
| 575 | sched:sched_process_fork [Tracepoint event] | ||
| 576 | sched:sched_process_exec [Tracepoint event] | ||
| 577 | sched:sched_stat_runtime [Tracepoint event] | ||
| 578 | rcu:rcu_utilization [Tracepoint event] | ||
| 579 | workqueue:workqueue_queue_work [Tracepoint event] | ||
| 580 | workqueue:workqueue_execute_end [Tracepoint event] | ||
| 581 | signal:signal_generate [Tracepoint event] | ||
| 582 | signal:signal_deliver [Tracepoint event] | ||
| 583 | timer:timer_init [Tracepoint event] | ||
| 584 | timer:timer_start [Tracepoint event] | ||
| 585 | timer:hrtimer_cancel [Tracepoint event] | ||
| 586 | timer:itimer_state [Tracepoint event] | ||
| 587 | timer:itimer_expire [Tracepoint event] | ||
| 588 | irq:irq_handler_entry [Tracepoint event] | ||
| 589 | irq:irq_handler_exit [Tracepoint event] | ||
| 590 | irq:softirq_entry [Tracepoint event] | ||
| 591 | irq:softirq_exit [Tracepoint event] | ||
| 592 | irq:softirq_raise [Tracepoint event] | ||
| 593 | printk:console [Tracepoint event] | ||
| 594 | task:task_newtask [Tracepoint event] | ||
| 595 | task:task_rename [Tracepoint event] | ||
| 596 | syscalls:sys_enter_socketcall [Tracepoint event] | ||
| 597 | syscalls:sys_exit_socketcall [Tracepoint event] | ||
| 598 | . | ||
| 599 | . | ||
| 600 | . | ||
| 601 | syscalls:sys_enter_unshare [Tracepoint event] | ||
| 602 | syscalls:sys_exit_unshare [Tracepoint event] | ||
| 603 | raw_syscalls:sys_enter [Tracepoint event] | ||
| 604 | raw_syscalls:sys_exit [Tracepoint event] | ||
| 605 | </literallayout> | ||
| 606 | </para> | ||
| 607 | |||
| 608 | <informalexample> | ||
| 609 | <emphasis>Tying it Together:</emphasis> These are exactly the same set of events defined | ||
| 610 | by the trace event subsystem and exposed by | ||
| 611 | ftrace/tracecmd/kernelshark as files in | ||
| 612 | /sys/kernel/debug/tracing/events, by SystemTap as | ||
| 613 | kernel.trace("tracepoint_name") and (partially) accessed by LTTng. | ||
| 614 | </informalexample> | ||
| 615 | |||
| 616 | <para> | ||
| 617 | Only a subset of these would be of interest to us when looking at | ||
| 618 | this workload, so let's choose the most likely subsystems | ||
| 619 | (identified by the string before the colon in the Tracepoint events) | ||
| 620 | and do a 'perf stat' run using only those wildcarded subsystems: | ||
| 621 | <literallayout class='monospaced'> | ||
| 622 | root@crownbay:~# perf stat -e skb:* -e net:* -e napi:* -e sched:* -e workqueue:* -e irq:* -e syscalls:* wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
| 623 | Performance counter stats for 'wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>': | ||
| 624 | |||
| 625 | 23323 skb:kfree_skb | ||
| 626 | 0 skb:consume_skb | ||
| 627 | 49897 skb:skb_copy_datagram_iovec | ||
| 628 | 6217 net:net_dev_xmit | ||
| 629 | 6217 net:net_dev_queue | ||
| 630 | 7962 net:netif_receive_skb | ||
| 631 | 2 net:netif_rx | ||
| 632 | 8340 napi:napi_poll | ||
| 633 | 0 sched:sched_kthread_stop | ||
| 634 | 0 sched:sched_kthread_stop_ret | ||
| 635 | 3749 sched:sched_wakeup | ||
| 636 | 0 sched:sched_wakeup_new | ||
| 637 | 0 sched:sched_switch | ||
| 638 | 29 sched:sched_migrate_task | ||
| 639 | 0 sched:sched_process_free | ||
| 640 | 1 sched:sched_process_exit | ||
| 641 | 0 sched:sched_wait_task | ||
| 642 | 0 sched:sched_process_wait | ||
| 643 | 0 sched:sched_process_fork | ||
| 644 | 1 sched:sched_process_exec | ||
| 645 | 0 sched:sched_stat_wait | ||
| 646 | 2106519415641 sched:sched_stat_sleep | ||
| 647 | 0 sched:sched_stat_iowait | ||
| 648 | 147453613 sched:sched_stat_blocked | ||
| 649 | 12903026955 sched:sched_stat_runtime | ||
| 650 | 0 sched:sched_pi_setprio | ||
| 651 | 3574 workqueue:workqueue_queue_work | ||
| 652 | 3574 workqueue:workqueue_activate_work | ||
| 653 | 0 workqueue:workqueue_execute_start | ||
| 654 | 0 workqueue:workqueue_execute_end | ||
| 655 | 16631 irq:irq_handler_entry | ||
| 656 | 16631 irq:irq_handler_exit | ||
| 657 | 28521 irq:softirq_entry | ||
| 658 | 28521 irq:softirq_exit | ||
| 659 | 28728 irq:softirq_raise | ||
| 660 | 1 syscalls:sys_enter_sendmmsg | ||
| 661 | 1 syscalls:sys_exit_sendmmsg | ||
| 662 | 0 syscalls:sys_enter_recvmmsg | ||
| 663 | 0 syscalls:sys_exit_recvmmsg | ||
| 664 | 14 syscalls:sys_enter_socketcall | ||
| 665 | 14 syscalls:sys_exit_socketcall | ||
| 666 | . | ||
| 667 | . | ||
| 668 | . | ||
| 669 | 16965 syscalls:sys_enter_read | ||
| 670 | 16965 syscalls:sys_exit_read | ||
| 671 | 12854 syscalls:sys_enter_write | ||
| 672 | 12854 syscalls:sys_exit_write | ||
| 673 | . | ||
| 674 | . | ||
| 675 | . | ||
| 676 | |||
| 677 | 58.029710972 seconds time elapsed | ||
| 678 | </literallayout> | ||
| 679 | Let's pick one of these tracepoints and tell perf to do a profile | ||
| 680 | using it as the sampling event: | ||
| 681 | <literallayout class='monospaced'> | ||
| 682 | root@crownbay:~# perf record -g -e sched:sched_wakeup wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
| 683 | </literallayout> | ||
| 684 | </para> | ||
| 685 | |||
| 686 | <para> | ||
| 687 | <imagedata fileref="figures/sched-wakeup-profile.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 688 | </para> | ||
| 689 | |||
| 690 | <para> | ||
| 691 | The screenshot above shows the results of running a profile using | ||
| 692 | sched:sched_switch tracepoint, which shows the relative costs of | ||
| 693 | various paths to sched_wakeup (note that sched_wakeup is the | ||
| 694 | name of the tracepoint - it's actually defined just inside | ||
| 695 | ttwu_do_wakeup(), which accounts for the function name actually | ||
| 696 | displayed in the profile: | ||
| 697 | <literallayout class='monospaced'> | ||
| 698 | /* | ||
| 699 | * Mark the task runnable and perform wakeup-preemption. | ||
| 700 | */ | ||
| 701 | static void | ||
| 702 | ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) | ||
| 703 | { | ||
| 704 | trace_sched_wakeup(p, true); | ||
| 705 | . | ||
| 706 | . | ||
| 707 | . | ||
| 708 | } | ||
| 709 | </literallayout> | ||
| 710 | A couple of the more interesting callchains are expanded and | ||
| 711 | displayed above, basically some network receive paths that | ||
| 712 | presumably end up waking up wget (busybox) when network data is | ||
| 713 | ready. | ||
| 714 | </para> | ||
| 715 | |||
| 716 | <para> | ||
| 717 | Note that because tracepoints are normally used for tracing, | ||
| 718 | the default sampling period for tracepoints is 1 i.e. for | ||
| 719 | tracepoints perf will sample on every event occurrence (this | ||
| 720 | can be changed using the -c option). This is in contrast to | ||
| 721 | hardware counters such as for example the default 'cycles' | ||
| 722 | hardware counter used for normal profiling, where sampling | ||
| 723 | periods are much higher (in the thousands) because profiling should | ||
| 724 | have as low an overhead as possible and sampling on every cycle | ||
| 725 | would be prohibitively expensive. | ||
| 726 | </para> | ||
| 727 | </section> | ||
| 728 | |||
| 729 | <section id='using-perf-to-do-basic-tracing'> | ||
| 730 | <title>Using perf to do Basic Tracing</title> | ||
| 731 | |||
| 732 | <para> | ||
| 733 | Profiling is a great tool for solving many problems or for | ||
| 734 | getting a high-level view of what's going on with a workload or | ||
| 735 | across the system. It is however by definition an approximation, | ||
| 736 | as suggested by the most prominent word associated with it, | ||
| 737 | 'sampling'. On the one hand, it allows a representative picture of | ||
| 738 | what's going on in the system to be cheaply taken, but on the other | ||
| 739 | hand, that cheapness limits its utility when that data suggests a | ||
| 740 | need to 'dive down' more deeply to discover what's really going | ||
| 741 | on. In such cases, the only way to see what's really going on is | ||
| 742 | to be able to look at (or summarize more intelligently) the | ||
| 743 | individual steps that go into the higher-level behavior exposed | ||
| 744 | by the coarse-grained profiling data. | ||
| 745 | </para> | ||
| 746 | |||
| 747 | <para> | ||
| 748 | As a concrete example, we can trace all the events we think might | ||
| 749 | be applicable to our workload: | ||
| 750 | <literallayout class='monospaced'> | ||
| 751 | root@crownbay:~# perf record -g -e skb:* -e net:* -e napi:* -e sched:sched_switch -e sched:sched_wakeup -e irq:* | ||
| 752 | -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write | ||
| 753 | wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
| 754 | </literallayout> | ||
| 755 | We can look at the raw trace output using 'perf script' with no | ||
| 756 | arguments: | ||
| 757 | <literallayout class='monospaced'> | ||
| 758 | root@crownbay:~# perf script | ||
| 759 | |||
| 760 | perf 1262 [000] 11624.857082: sys_exit_read: 0x0 | ||
| 761 | perf 1262 [000] 11624.857193: sched_wakeup: comm=migration/0 pid=6 prio=0 success=1 target_cpu=000 | ||
| 762 | wget 1262 [001] 11624.858021: softirq_raise: vec=1 [action=TIMER] | ||
| 763 | wget 1262 [001] 11624.858074: softirq_entry: vec=1 [action=TIMER] | ||
| 764 | wget 1262 [001] 11624.858081: softirq_exit: vec=1 [action=TIMER] | ||
| 765 | wget 1262 [001] 11624.858166: sys_enter_read: fd: 0x0003, buf: 0xbf82c940, count: 0x0200 | ||
| 766 | wget 1262 [001] 11624.858177: sys_exit_read: 0x200 | ||
| 767 | wget 1262 [001] 11624.858878: kfree_skb: skbaddr=0xeb248d80 protocol=0 location=0xc15a5308 | ||
| 768 | wget 1262 [001] 11624.858945: kfree_skb: skbaddr=0xeb248000 protocol=0 location=0xc15a5308 | ||
| 769 | wget 1262 [001] 11624.859020: softirq_raise: vec=1 [action=TIMER] | ||
| 770 | wget 1262 [001] 11624.859076: softirq_entry: vec=1 [action=TIMER] | ||
| 771 | wget 1262 [001] 11624.859083: softirq_exit: vec=1 [action=TIMER] | ||
| 772 | wget 1262 [001] 11624.859167: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400 | ||
| 773 | wget 1262 [001] 11624.859192: sys_exit_read: 0x1d7 | ||
| 774 | wget 1262 [001] 11624.859228: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400 | ||
| 775 | wget 1262 [001] 11624.859233: sys_exit_read: 0x0 | ||
| 776 | wget 1262 [001] 11624.859573: sys_enter_read: fd: 0x0003, buf: 0xbf82c580, count: 0x0200 | ||
| 777 | wget 1262 [001] 11624.859584: sys_exit_read: 0x200 | ||
| 778 | wget 1262 [001] 11624.859864: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400 | ||
| 779 | wget 1262 [001] 11624.859888: sys_exit_read: 0x400 | ||
| 780 | wget 1262 [001] 11624.859935: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400 | ||
| 781 | wget 1262 [001] 11624.859944: sys_exit_read: 0x400 | ||
| 782 | </literallayout> | ||
| 783 | This gives us a detailed timestamped sequence of events that | ||
| 784 | occurred within the workload with respect to those events. | ||
| 785 | </para> | ||
| 786 | |||
| 787 | <para> | ||
| 788 | In many ways, profiling can be viewed as a subset of tracing - | ||
| 789 | theoretically, if you have a set of trace events that's sufficient | ||
| 790 | to capture all the important aspects of a workload, you can derive | ||
| 791 | any of the results or views that a profiling run can. | ||
| 792 | </para> | ||
| 793 | |||
| 794 | <para> | ||
| 795 | Another aspect of traditional profiling is that while powerful in | ||
| 796 | many ways, it's limited by the granularity of the underlying data. | ||
| 797 | Profiling tools offer various ways of sorting and presenting the | ||
| 798 | sample data, which make it much more useful and amenable to user | ||
| 799 | experimentation, but in the end it can't be used in an open-ended | ||
| 800 | way to extract data that just isn't present as a consequence of | ||
| 801 | the fact that conceptually, most of it has been thrown away. | ||
| 802 | </para> | ||
| 803 | |||
| 804 | <para> | ||
| 805 | Full-blown detailed tracing data does however offer the opportunity | ||
| 806 | to manipulate and present the information collected during a | ||
| 807 | tracing run in an infinite variety of ways. | ||
| 808 | </para> | ||
| 809 | |||
| 810 | <para> | ||
| 811 | Another way to look at it is that there are only so many ways that | ||
| 812 | the 'primitive' counters can be used on their own to generate | ||
| 813 | interesting output; to get anything more complicated than simple | ||
| 814 | counts requires some amount of additional logic, which is typically | ||
| 815 | very specific to the problem at hand. For example, if we wanted to | ||
| 816 | make use of a 'counter' that maps to the value of the time | ||
| 817 | difference between when a process was scheduled to run on a | ||
| 818 | processor and the time it actually ran, we wouldn't expect such | ||
| 819 | a counter to exist on its own, but we could derive one called say | ||
| 820 | 'wakeup_latency' and use it to extract a useful view of that metric | ||
| 821 | from trace data. Likewise, we really can't figure out from standard | ||
| 822 | profiling tools how much data every process on the system reads and | ||
| 823 | writes, along with how many of those reads and writes fail | ||
| 824 | completely. If we have sufficient trace data, however, we could | ||
| 825 | with the right tools easily extract and present that information, | ||
| 826 | but we'd need something other than pre-canned profiling tools to | ||
| 827 | do that. | ||
| 828 | </para> | ||
| 829 | |||
| 830 | <para> | ||
| 831 | Luckily, there is a general-purpose way to handle such needs, | ||
| 832 | called 'programming languages'. Making programming languages | ||
| 833 | easily available to apply to such problems given the specific | ||
| 834 | format of data is called a 'programming language binding' for | ||
| 835 | that data and language. Perf supports two programming language | ||
| 836 | bindings, one for Python and one for Perl. | ||
| 837 | </para> | ||
| 838 | |||
| 839 | <informalexample> | ||
| 840 | <emphasis>Tying it Together:</emphasis> Language bindings for manipulating and | ||
| 841 | aggregating trace data are of course not a new | ||
| 842 | idea. One of the first projects to do this was IBM's DProbes | ||
| 843 | dpcc compiler, an ANSI C compiler which targeted a low-level | ||
| 844 | assembly language running on an in-kernel interpreter on the | ||
| 845 | target system. This is exactly analagous to what Sun's DTrace | ||
| 846 | did, except that DTrace invented its own language for the purpose. | ||
| 847 | Systemtap, heavily inspired by DTrace, also created its own | ||
| 848 | one-off language, but rather than running the product on an | ||
| 849 | in-kernel interpreter, created an elaborate compiler-based | ||
| 850 | machinery to translate its language into kernel modules written | ||
| 851 | in C. | ||
| 852 | </informalexample> | ||
| 853 | |||
| 854 | <para> | ||
| 855 | Now that we have the trace data in perf.data, we can use | ||
| 856 | 'perf script -g' to generate a skeleton script with handlers | ||
| 857 | for the read/write entry/exit events we recorded: | ||
| 858 | <literallayout class='monospaced'> | ||
| 859 | root@crownbay:~# perf script -g python | ||
| 860 | generated Python script: perf-script.py | ||
| 861 | </literallayout> | ||
| 862 | The skeleton script simply creates a python function for each | ||
| 863 | event type in the perf.data file. The body of each function simply | ||
| 864 | prints the event name along with its parameters. For example: | ||
| 865 | <literallayout class='monospaced'> | ||
| 866 | def net__netif_rx(event_name, context, common_cpu, | ||
| 867 | common_secs, common_nsecs, common_pid, common_comm, | ||
| 868 | skbaddr, len, name): | ||
| 869 | print_header(event_name, common_cpu, common_secs, common_nsecs, | ||
| 870 | common_pid, common_comm) | ||
| 871 | |||
| 872 | print "skbaddr=%u, len=%u, name=%s\n" % (skbaddr, len, name), | ||
| 873 | </literallayout> | ||
| 874 | We can run that script directly to print all of the events | ||
| 875 | contained in the perf.data file: | ||
| 876 | <literallayout class='monospaced'> | ||
| 877 | root@crownbay:~# perf script -s perf-script.py | ||
| 878 | |||
| 879 | in trace_begin | ||
| 880 | syscalls__sys_exit_read 0 11624.857082795 1262 perf nr=3, ret=0 | ||
| 881 | sched__sched_wakeup 0 11624.857193498 1262 perf comm=migration/0, pid=6, prio=0, success=1, target_cpu=0 | ||
| 882 | irq__softirq_raise 1 11624.858021635 1262 wget vec=TIMER | ||
| 883 | irq__softirq_entry 1 11624.858074075 1262 wget vec=TIMER | ||
| 884 | irq__softirq_exit 1 11624.858081389 1262 wget vec=TIMER | ||
| 885 | syscalls__sys_enter_read 1 11624.858166434 1262 wget nr=3, fd=3, buf=3213019456, count=512 | ||
| 886 | syscalls__sys_exit_read 1 11624.858177924 1262 wget nr=3, ret=512 | ||
| 887 | skb__kfree_skb 1 11624.858878188 1262 wget skbaddr=3945041280, location=3243922184, protocol=0 | ||
| 888 | skb__kfree_skb 1 11624.858945608 1262 wget skbaddr=3945037824, location=3243922184, protocol=0 | ||
| 889 | irq__softirq_raise 1 11624.859020942 1262 wget vec=TIMER | ||
| 890 | irq__softirq_entry 1 11624.859076935 1262 wget vec=TIMER | ||
| 891 | irq__softirq_exit 1 11624.859083469 1262 wget vec=TIMER | ||
| 892 | syscalls__sys_enter_read 1 11624.859167565 1262 wget nr=3, fd=3, buf=3077701632, count=1024 | ||
| 893 | syscalls__sys_exit_read 1 11624.859192533 1262 wget nr=3, ret=471 | ||
| 894 | syscalls__sys_enter_read 1 11624.859228072 1262 wget nr=3, fd=3, buf=3077701632, count=1024 | ||
| 895 | syscalls__sys_exit_read 1 11624.859233707 1262 wget nr=3, ret=0 | ||
| 896 | syscalls__sys_enter_read 1 11624.859573008 1262 wget nr=3, fd=3, buf=3213018496, count=512 | ||
| 897 | syscalls__sys_exit_read 1 11624.859584818 1262 wget nr=3, ret=512 | ||
| 898 | syscalls__sys_enter_read 1 11624.859864562 1262 wget nr=3, fd=3, buf=3077701632, count=1024 | ||
| 899 | syscalls__sys_exit_read 1 11624.859888770 1262 wget nr=3, ret=1024 | ||
| 900 | syscalls__sys_enter_read 1 11624.859935140 1262 wget nr=3, fd=3, buf=3077701632, count=1024 | ||
| 901 | syscalls__sys_exit_read 1 11624.859944032 1262 wget nr=3, ret=1024 | ||
| 902 | </literallayout> | ||
| 903 | That in itself isn't very useful; after all, we can accomplish | ||
| 904 | pretty much the same thing by simply running 'perf script' | ||
| 905 | without arguments in the same directory as the perf.data file. | ||
| 906 | </para> | ||
| 907 | |||
| 908 | <para> | ||
| 909 | We can however replace the print statements in the generated | ||
| 910 | function bodies with whatever we want, and thereby make it | ||
| 911 | infinitely more useful. | ||
| 912 | </para> | ||
| 913 | |||
| 914 | <para> | ||
| 915 | As a simple example, let's just replace the print statements in | ||
| 916 | the function bodies with a simple function that does nothing but | ||
| 917 | increment a per-event count. When the program is run against a | ||
| 918 | perf.data file, each time a particular event is encountered, | ||
| 919 | a tally is incremented for that event. For example: | ||
| 920 | <literallayout class='monospaced'> | ||
| 921 | def net__netif_rx(event_name, context, common_cpu, | ||
| 922 | common_secs, common_nsecs, common_pid, common_comm, | ||
| 923 | skbaddr, len, name): | ||
| 924 | inc_counts(event_name) | ||
| 925 | </literallayout> | ||
| 926 | Each event handler function in the generated code is modified | ||
| 927 | to do this. For convenience, we define a common function called | ||
| 928 | inc_counts() that each handler calls; inc_counts() simply tallies | ||
| 929 | a count for each event using the 'counts' hash, which is a | ||
| 930 | specialized hash function that does Perl-like autovivification, a | ||
| 931 | capability that's extremely useful for kinds of multi-level | ||
| 932 | aggregation commonly used in processing traces (see perf's | ||
| 933 | documentation on the Python language binding for details): | ||
| 934 | <literallayout class='monospaced'> | ||
| 935 | counts = autodict() | ||
| 936 | |||
| 937 | def inc_counts(event_name): | ||
| 938 | try: | ||
| 939 | counts[event_name] += 1 | ||
| 940 | except TypeError: | ||
| 941 | counts[event_name] = 1 | ||
| 942 | </literallayout> | ||
| 943 | Finally, at the end of the trace processing run, we want to | ||
| 944 | print the result of all the per-event tallies. For that, we | ||
| 945 | use the special 'trace_end()' function: | ||
| 946 | <literallayout class='monospaced'> | ||
| 947 | def trace_end(): | ||
| 948 | for event_name, count in counts.iteritems(): | ||
| 949 | print "%-40s %10s\n" % (event_name, count) | ||
| 950 | </literallayout> | ||
| 951 | The end result is a summary of all the events recorded in the | ||
| 952 | trace: | ||
| 953 | <literallayout class='monospaced'> | ||
| 954 | skb__skb_copy_datagram_iovec 13148 | ||
| 955 | irq__softirq_entry 4796 | ||
| 956 | irq__irq_handler_exit 3805 | ||
| 957 | irq__softirq_exit 4795 | ||
| 958 | syscalls__sys_enter_write 8990 | ||
| 959 | net__net_dev_xmit 652 | ||
| 960 | skb__kfree_skb 4047 | ||
| 961 | sched__sched_wakeup 1155 | ||
| 962 | irq__irq_handler_entry 3804 | ||
| 963 | irq__softirq_raise 4799 | ||
| 964 | net__net_dev_queue 652 | ||
| 965 | syscalls__sys_enter_read 17599 | ||
| 966 | net__netif_receive_skb 1743 | ||
| 967 | syscalls__sys_exit_read 17598 | ||
| 968 | net__netif_rx 2 | ||
| 969 | napi__napi_poll 1877 | ||
| 970 | syscalls__sys_exit_write 8990 | ||
| 971 | </literallayout> | ||
| 972 | Note that this is pretty much exactly the same information we get | ||
| 973 | from 'perf stat', which goes a little way to support the idea | ||
| 974 | mentioned previously that given the right kind of trace data, | ||
| 975 | higher-level profiling-type summaries can be derived from it. | ||
| 976 | </para> | ||
| 977 | |||
| 978 | <para> | ||
| 979 | Documentation on using the | ||
| 980 | <ulink url='http://linux.die.net/man/1/perf-script-python'>'perf script' python binding</ulink>. | ||
| 981 | </para> | ||
| 982 | </section> | ||
| 983 | |||
| 984 | <section id='system-wide-tracing-and-profiling'> | ||
| 985 | <title>System-Wide Tracing and Profiling</title> | ||
| 986 | |||
| 987 | <para> | ||
| 988 | The examples so far have focused on tracing a particular program or | ||
| 989 | workload - in other words, every profiling run has specified the | ||
| 990 | program to profile in the command-line e.g. 'perf record wget ...'. | ||
| 991 | </para> | ||
| 992 | |||
| 993 | <para> | ||
| 994 | It's also possible, and more interesting in many cases, to run a | ||
| 995 | system-wide profile or trace while running the workload in a | ||
| 996 | separate shell. | ||
| 997 | </para> | ||
| 998 | |||
| 999 | <para> | ||
| 1000 | To do system-wide profiling or tracing, you typically use | ||
| 1001 | the -a flag to 'perf record'. | ||
| 1002 | </para> | ||
| 1003 | |||
| 1004 | <para> | ||
| 1005 | To demonstrate this, open up one window and start the profile | ||
| 1006 | using the -a flag (press Ctrl-C to stop tracing): | ||
| 1007 | <literallayout class='monospaced'> | ||
| 1008 | root@crownbay:~# perf record -g -a | ||
| 1009 | ^C[ perf record: Woken up 6 times to write data ] | ||
| 1010 | [ perf record: Captured and wrote 1.400 MB perf.data (~61172 samples) ] | ||
| 1011 | </literallayout> | ||
| 1012 | In another window, run the wget test: | ||
| 1013 | <literallayout class='monospaced'> | ||
| 1014 | root@crownbay:~# wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
| 1015 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
| 1016 | linux-2.6.19.2.tar.b 100% |*******************************| 41727k 0:00:00 ETA | ||
| 1017 | </literallayout> | ||
| 1018 | Here we see entries not only for our wget load, but for other | ||
| 1019 | processes running on the system as well: | ||
| 1020 | </para> | ||
| 1021 | |||
| 1022 | <para> | ||
| 1023 | <imagedata fileref="figures/perf-systemwide.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 1024 | </para> | ||
| 1025 | |||
| 1026 | <para> | ||
| 1027 | In the snapshot above, we can see callchains that originate in | ||
| 1028 | libc, and a callchain from Xorg that demonstrates that we're | ||
| 1029 | using a proprietary X driver in userspace (notice the presence | ||
| 1030 | of 'PVR' and some other unresolvable symbols in the expanded | ||
| 1031 | Xorg callchain). | ||
| 1032 | </para> | ||
| 1033 | |||
| 1034 | <para> | ||
| 1035 | Note also that we have both kernel and userspace entries in the | ||
| 1036 | above snapshot. We can also tell perf to focus on userspace but | ||
| 1037 | providing a modifier, in this case 'u', to the 'cycles' hardware | ||
| 1038 | counter when we record a profile: | ||
| 1039 | <literallayout class='monospaced'> | ||
| 1040 | root@crownbay:~# perf record -g -a -e cycles:u | ||
| 1041 | ^C[ perf record: Woken up 2 times to write data ] | ||
| 1042 | [ perf record: Captured and wrote 0.376 MB perf.data (~16443 samples) ] | ||
| 1043 | </literallayout> | ||
| 1044 | </para> | ||
| 1045 | |||
| 1046 | <para> | ||
| 1047 | <imagedata fileref="figures/perf-report-cycles-u.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 1048 | </para> | ||
| 1049 | |||
| 1050 | <para> | ||
| 1051 | Notice in the screenshot above, we see only userspace entries ([.]) | ||
| 1052 | </para> | ||
| 1053 | |||
| 1054 | <para> | ||
| 1055 | Finally, we can press 'enter' on a leaf node and select the 'Zoom | ||
| 1056 | into DSO' menu item to show only entries associated with a | ||
| 1057 | specific DSO. In the screenshot below, we've zoomed into the | ||
| 1058 | 'libc' DSO which shows all the entries associated with the | ||
| 1059 | libc-xxx.so DSO. | ||
| 1060 | </para> | ||
| 1061 | |||
| 1062 | <para> | ||
| 1063 | <imagedata fileref="figures/perf-systemwide-libc.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 1064 | </para> | ||
| 1065 | |||
| 1066 | <para> | ||
| 1067 | We can also use the system-wide -a switch to do system-wide | ||
| 1068 | tracing. Here we'll trace a couple of scheduler events: | ||
| 1069 | <literallayout class='monospaced'> | ||
| 1070 | root@crownbay:~# perf record -a -e sched:sched_switch -e sched:sched_wakeup | ||
| 1071 | ^C[ perf record: Woken up 38 times to write data ] | ||
| 1072 | [ perf record: Captured and wrote 9.780 MB perf.data (~427299 samples) ] | ||
| 1073 | </literallayout> | ||
| 1074 | We can look at the raw output using 'perf script' with no | ||
| 1075 | arguments: | ||
| 1076 | <literallayout class='monospaced'> | ||
| 1077 | root@crownbay:~# perf script | ||
| 1078 | |||
| 1079 | perf 1383 [001] 6171.460045: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
| 1080 | perf 1383 [001] 6171.460066: sched_switch: prev_comm=perf prev_pid=1383 prev_prio=120 prev_state=R+ ==> next_comm=kworker/1:1 next_pid=21 next_prio=120 | ||
| 1081 | kworker/1:1 21 [001] 6171.460093: sched_switch: prev_comm=kworker/1:1 prev_pid=21 prev_prio=120 prev_state=S ==> next_comm=perf next_pid=1383 next_prio=120 | ||
| 1082 | swapper 0 [000] 6171.468063: sched_wakeup: comm=kworker/0:3 pid=1209 prio=120 success=1 target_cpu=000 | ||
| 1083 | swapper 0 [000] 6171.468107: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/0:3 next_pid=1209 next_prio=120 | ||
| 1084 | kworker/0:3 1209 [000] 6171.468143: sched_switch: prev_comm=kworker/0:3 prev_pid=1209 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120 | ||
| 1085 | perf 1383 [001] 6171.470039: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
| 1086 | perf 1383 [001] 6171.470058: sched_switch: prev_comm=perf prev_pid=1383 prev_prio=120 prev_state=R+ ==> next_comm=kworker/1:1 next_pid=21 next_prio=120 | ||
| 1087 | kworker/1:1 21 [001] 6171.470082: sched_switch: prev_comm=kworker/1:1 prev_pid=21 prev_prio=120 prev_state=S ==> next_comm=perf next_pid=1383 next_prio=120 | ||
| 1088 | perf 1383 [001] 6171.480035: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
| 1089 | </literallayout> | ||
| 1090 | </para> | ||
| 1091 | |||
| 1092 | <section id='perf-filtering'> | ||
| 1093 | <title>Filtering</title> | ||
| 1094 | |||
| 1095 | <para> | ||
| 1096 | Notice that there are a lot of events that don't really have | ||
| 1097 | anything to do with what we're interested in, namely events | ||
| 1098 | that schedule 'perf' itself in and out or that wake perf up. | ||
| 1099 | We can get rid of those by using the '--filter' option - | ||
| 1100 | for each event we specify using -e, we can add a --filter | ||
| 1101 | after that to filter out trace events that contain fields | ||
| 1102 | with specific values: | ||
| 1103 | <literallayout class='monospaced'> | ||
| 1104 | root@crownbay:~# perf record -a -e sched:sched_switch --filter 'next_comm != perf && prev_comm != perf' -e sched:sched_wakeup --filter 'comm != perf' | ||
| 1105 | ^C[ perf record: Woken up 38 times to write data ] | ||
| 1106 | [ perf record: Captured and wrote 9.688 MB perf.data (~423279 samples) ] | ||
| 1107 | |||
| 1108 | |||
| 1109 | root@crownbay:~# perf script | ||
| 1110 | |||
| 1111 | swapper 0 [000] 7932.162180: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/0:3 next_pid=1209 next_prio=120 | ||
| 1112 | kworker/0:3 1209 [000] 7932.162236: sched_switch: prev_comm=kworker/0:3 prev_pid=1209 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120 | ||
| 1113 | perf 1407 [001] 7932.170048: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
| 1114 | perf 1407 [001] 7932.180044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
| 1115 | perf 1407 [001] 7932.190038: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
| 1116 | perf 1407 [001] 7932.200044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
| 1117 | perf 1407 [001] 7932.210044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
| 1118 | perf 1407 [001] 7932.220044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
| 1119 | swapper 0 [001] 7932.230111: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
| 1120 | swapper 0 [001] 7932.230146: sched_switch: prev_comm=swapper/1 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/1:1 next_pid=21 next_prio=120 | ||
| 1121 | kworker/1:1 21 [001] 7932.230205: sched_switch: prev_comm=kworker/1:1 prev_pid=21 prev_prio=120 prev_state=S ==> next_comm=swapper/1 next_pid=0 next_prio=120 | ||
| 1122 | swapper 0 [000] 7932.326109: sched_wakeup: comm=kworker/0:3 pid=1209 prio=120 success=1 target_cpu=000 | ||
| 1123 | swapper 0 [000] 7932.326171: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/0:3 next_pid=1209 next_prio=120 | ||
| 1124 | kworker/0:3 1209 [000] 7932.326214: sched_switch: prev_comm=kworker/0:3 prev_pid=1209 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120 | ||
| 1125 | </literallayout> | ||
| 1126 | In this case, we've filtered out all events that have 'perf' | ||
| 1127 | in their 'comm' or 'comm_prev' or 'comm_next' fields. Notice | ||
| 1128 | that there are still events recorded for perf, but notice | ||
| 1129 | that those events don't have values of 'perf' for the filtered | ||
| 1130 | fields. To completely filter out anything from perf will | ||
| 1131 | require a bit more work, but for the purpose of demonstrating | ||
| 1132 | how to use filters, it's close enough. | ||
| 1133 | </para> | ||
| 1134 | |||
| 1135 | <informalexample> | ||
| 1136 | <emphasis>Tying it Together:</emphasis> These are exactly the same set of event | ||
| 1137 | filters defined by the trace event subsystem. See the | ||
| 1138 | ftrace/tracecmd/kernelshark section for more discussion about | ||
| 1139 | these event filters. | ||
| 1140 | </informalexample> | ||
| 1141 | |||
| 1142 | <informalexample> | ||
| 1143 | <emphasis>Tying it Together:</emphasis> These event filters are implemented by a | ||
| 1144 | special-purpose pseudo-interpreter in the kernel and are an | ||
| 1145 | integral and indispensable part of the perf design as it | ||
| 1146 | relates to tracing. kernel-based event filters provide a | ||
| 1147 | mechanism to precisely throttle the event stream that appears | ||
| 1148 | in user space, where it makes sense to provide bindings to real | ||
| 1149 | programming languages for postprocessing the event stream. | ||
| 1150 | This architecture allows for the intelligent and flexible | ||
| 1151 | partitioning of processing between the kernel and user space. | ||
| 1152 | Contrast this with other tools such as SystemTap, which does | ||
| 1153 | all of its processing in the kernel and as such requires a | ||
| 1154 | special project-defined language in order to accommodate that | ||
| 1155 | design, or LTTng, where everything is sent to userspace and | ||
| 1156 | as such requires a super-efficient kernel-to-userspace | ||
| 1157 | transport mechanism in order to function properly. While | ||
| 1158 | perf certainly can benefit from for instance advances in | ||
| 1159 | the design of the transport, it doesn't fundamentally depend | ||
| 1160 | on them. Basically, if you find that your perf tracing | ||
| 1161 | application is causing buffer I/O overruns, it probably | ||
| 1162 | means that you aren't taking enough advantage of the | ||
| 1163 | kernel filtering engine. | ||
| 1164 | </informalexample> | ||
| 1165 | </section> | ||
| 1166 | </section> | ||
| 1167 | |||
| 1168 | <section id='using-dynamic-tracepoints'> | ||
| 1169 | <title>Using Dynamic Tracepoints</title> | ||
| 1170 | |||
| 1171 | <para> | ||
| 1172 | perf isn't restricted to the fixed set of static tracepoints | ||
| 1173 | listed by 'perf list'. Users can also add their own 'dynamic' | ||
| 1174 | tracepoints anywhere in the kernel. For instance, suppose we | ||
| 1175 | want to define our own tracepoint on do_fork(). We can do that | ||
| 1176 | using the 'perf probe' perf subcommand: | ||
| 1177 | <literallayout class='monospaced'> | ||
| 1178 | root@crownbay:~# perf probe do_fork | ||
| 1179 | Added new event: | ||
| 1180 | probe:do_fork (on do_fork) | ||
| 1181 | |||
| 1182 | You can now use it in all perf tools, such as: | ||
| 1183 | |||
| 1184 | perf record -e probe:do_fork -aR sleep 1 | ||
| 1185 | </literallayout> | ||
| 1186 | Adding a new tracepoint via 'perf probe' results in an event | ||
| 1187 | with all the expected files and format in | ||
| 1188 | /sys/kernel/debug/tracing/events, just the same as for static | ||
| 1189 | tracepoints (as discussed in more detail in the trace events | ||
| 1190 | subsystem section: | ||
| 1191 | <literallayout class='monospaced'> | ||
| 1192 | root@crownbay:/sys/kernel/debug/tracing/events/probe/do_fork# ls -al | ||
| 1193 | drwxr-xr-x 2 root root 0 Oct 28 11:42 . | ||
| 1194 | drwxr-xr-x 3 root root 0 Oct 28 11:42 .. | ||
| 1195 | -rw-r--r-- 1 root root 0 Oct 28 11:42 enable | ||
| 1196 | -rw-r--r-- 1 root root 0 Oct 28 11:42 filter | ||
| 1197 | -r--r--r-- 1 root root 0 Oct 28 11:42 format | ||
| 1198 | -r--r--r-- 1 root root 0 Oct 28 11:42 id | ||
| 1199 | |||
| 1200 | root@crownbay:/sys/kernel/debug/tracing/events/probe/do_fork# cat format | ||
| 1201 | name: do_fork | ||
| 1202 | ID: 944 | ||
| 1203 | format: | ||
| 1204 | field:unsigned short common_type; offset:0; size:2; signed:0; | ||
| 1205 | field:unsigned char common_flags; offset:2; size:1; signed:0; | ||
| 1206 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; | ||
| 1207 | field:int common_pid; offset:4; size:4; signed:1; | ||
| 1208 | field:int common_padding; offset:8; size:4; signed:1; | ||
| 1209 | |||
| 1210 | field:unsigned long __probe_ip; offset:12; size:4; signed:0; | ||
| 1211 | |||
| 1212 | print fmt: "(%lx)", REC->__probe_ip | ||
| 1213 | </literallayout> | ||
| 1214 | We can list all dynamic tracepoints currently in existence: | ||
| 1215 | <literallayout class='monospaced'> | ||
| 1216 | root@crownbay:~# perf probe -l | ||
| 1217 | probe:do_fork (on do_fork) | ||
| 1218 | probe:schedule (on schedule) | ||
| 1219 | </literallayout> | ||
| 1220 | Let's record system-wide ('sleep 30' is a trick for recording | ||
| 1221 | system-wide but basically do nothing and then wake up after | ||
| 1222 | 30 seconds): | ||
| 1223 | <literallayout class='monospaced'> | ||
| 1224 | root@crownbay:~# perf record -g -a -e probe:do_fork sleep 30 | ||
| 1225 | [ perf record: Woken up 1 times to write data ] | ||
| 1226 | [ perf record: Captured and wrote 0.087 MB perf.data (~3812 samples) ] | ||
| 1227 | </literallayout> | ||
| 1228 | Using 'perf script' we can see each do_fork event that fired: | ||
| 1229 | <literallayout class='monospaced'> | ||
| 1230 | root@crownbay:~# perf script | ||
| 1231 | |||
| 1232 | # ======== | ||
| 1233 | # captured on: Sun Oct 28 11:55:18 2012 | ||
| 1234 | # hostname : crownbay | ||
| 1235 | # os release : 3.4.11-yocto-standard | ||
| 1236 | # perf version : 3.4.11 | ||
| 1237 | # arch : i686 | ||
| 1238 | # nrcpus online : 2 | ||
| 1239 | # nrcpus avail : 2 | ||
| 1240 | # cpudesc : Intel(R) Atom(TM) CPU E660 @ 1.30GHz | ||
| 1241 | # cpuid : GenuineIntel,6,38,1 | ||
| 1242 | # total memory : 1017184 kB | ||
| 1243 | # cmdline : /usr/bin/perf record -g -a -e probe:do_fork sleep 30 | ||
| 1244 | # event : name = probe:do_fork, type = 2, config = 0x3b0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern | ||
| 1245 | = 0, id = { 5, 6 } | ||
| 1246 | # HEADER_CPU_TOPOLOGY info available, use -I to display | ||
| 1247 | # ======== | ||
| 1248 | # | ||
| 1249 | matchbox-deskto 1197 [001] 34211.378318: do_fork: (c1028460) | ||
| 1250 | matchbox-deskto 1295 [001] 34211.380388: do_fork: (c1028460) | ||
| 1251 | pcmanfm 1296 [000] 34211.632350: do_fork: (c1028460) | ||
| 1252 | pcmanfm 1296 [000] 34211.639917: do_fork: (c1028460) | ||
| 1253 | matchbox-deskto 1197 [001] 34217.541603: do_fork: (c1028460) | ||
| 1254 | matchbox-deskto 1299 [001] 34217.543584: do_fork: (c1028460) | ||
| 1255 | gthumb 1300 [001] 34217.697451: do_fork: (c1028460) | ||
| 1256 | gthumb 1300 [001] 34219.085734: do_fork: (c1028460) | ||
| 1257 | gthumb 1300 [000] 34219.121351: do_fork: (c1028460) | ||
| 1258 | gthumb 1300 [001] 34219.264551: do_fork: (c1028460) | ||
| 1259 | pcmanfm 1296 [000] 34219.590380: do_fork: (c1028460) | ||
| 1260 | matchbox-deskto 1197 [001] 34224.955965: do_fork: (c1028460) | ||
| 1261 | matchbox-deskto 1306 [001] 34224.957972: do_fork: (c1028460) | ||
| 1262 | matchbox-termin 1307 [000] 34225.038214: do_fork: (c1028460) | ||
| 1263 | matchbox-termin 1307 [001] 34225.044218: do_fork: (c1028460) | ||
| 1264 | matchbox-termin 1307 [000] 34225.046442: do_fork: (c1028460) | ||
| 1265 | matchbox-deskto 1197 [001] 34237.112138: do_fork: (c1028460) | ||
| 1266 | matchbox-deskto 1311 [001] 34237.114106: do_fork: (c1028460) | ||
| 1267 | gaku 1312 [000] 34237.202388: do_fork: (c1028460) | ||
| 1268 | </literallayout> | ||
| 1269 | And using 'perf report' on the same file, we can see the | ||
| 1270 | callgraphs from starting a few programs during those 30 seconds: | ||
| 1271 | </para> | ||
| 1272 | |||
| 1273 | <para> | ||
| 1274 | <imagedata fileref="figures/perf-probe-do_fork-profile.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 1275 | </para> | ||
| 1276 | |||
| 1277 | <informalexample> | ||
| 1278 | <emphasis>Tying it Together:</emphasis> The trace events subsystem accomodate static | ||
| 1279 | and dynamic tracepoints in exactly the same way - there's no | ||
| 1280 | difference as far as the infrastructure is concerned. See the | ||
| 1281 | ftrace section for more details on the trace event subsystem. | ||
| 1282 | </informalexample> | ||
| 1283 | |||
| 1284 | <informalexample> | ||
| 1285 | <emphasis>Tying it Together:</emphasis> Dynamic tracepoints are implemented under the | ||
| 1286 | covers by kprobes and uprobes. kprobes and uprobes are also used | ||
| 1287 | by and in fact are the main focus of SystemTap. | ||
| 1288 | </informalexample> | ||
| 1289 | </section> | ||
| 1290 | </section> | ||
| 1291 | |||
| 1292 | <section id='perf-documentation'> | ||
| 1293 | <title>Documentation</title> | ||
| 1294 | |||
| 1295 | <para> | ||
| 1296 | Online versions of the man pages for the commands discussed in this | ||
| 1297 | section can be found here: | ||
| 1298 | <itemizedlist> | ||
| 1299 | <listitem><para>The <ulink url='http://linux.die.net/man/1/perf-stat'>'perf stat' manpage</ulink>. | ||
| 1300 | </para></listitem> | ||
| 1301 | <listitem><para>The <ulink url='http://linux.die.net/man/1/perf-record'>'perf record' manpage</ulink>. | ||
| 1302 | </para></listitem> | ||
| 1303 | <listitem><para>The <ulink url='http://linux.die.net/man/1/perf-report'>'perf report' manpage</ulink>. | ||
| 1304 | </para></listitem> | ||
| 1305 | <listitem><para>The <ulink url='http://linux.die.net/man/1/perf-probe'>'perf probe' manpage</ulink>. | ||
| 1306 | </para></listitem> | ||
| 1307 | <listitem><para>The <ulink url='http://linux.die.net/man/1/perf-script'>'perf script' manpage</ulink>. | ||
| 1308 | </para></listitem> | ||
| 1309 | <listitem><para>Documentation on using the | ||
| 1310 | <ulink url='http://linux.die.net/man/1/perf-script-python'>'perf script' python binding</ulink>. | ||
| 1311 | </para></listitem> | ||
| 1312 | <listitem><para>The top-level | ||
| 1313 | <ulink url='http://linux.die.net/man/1/perf'>perf(1) manpage</ulink>. | ||
| 1314 | </para></listitem> | ||
| 1315 | </itemizedlist> | ||
| 1316 | </para> | ||
| 1317 | |||
| 1318 | <para> | ||
| 1319 | Normally, you should be able to invoke the man pages via perf | ||
| 1320 | itself e.g. 'perf help' or 'perf help record'. | ||
| 1321 | </para> | ||
| 1322 | |||
| 1323 | <para> | ||
| 1324 | However, by default Yocto doesn't install man pages, but perf | ||
| 1325 | invokes the man pages for most help functionality. This is a bug | ||
| 1326 | and is being addressed by a Yocto bug: | ||
| 1327 | <ulink url='https://bugzilla.yoctoproject.org/show_bug.cgi?id=3388'>Bug 3388 - perf: enable man pages for basic 'help' functionality</ulink>. | ||
| 1328 | </para> | ||
| 1329 | |||
| 1330 | <para> | ||
| 1331 | The man pages in text form, along with some other files, such as | ||
| 1332 | a set of examples, can be found in the 'perf' directory of the | ||
| 1333 | kernel tree: | ||
| 1334 | <literallayout class='monospaced'> | ||
| 1335 | tools/perf/Documentation | ||
| 1336 | </literallayout> | ||
| 1337 | There's also a nice perf tutorial on the perf wiki that goes | ||
| 1338 | into more detail than we do here in certain areas: | ||
| 1339 | <ulink url='https://perf.wiki.kernel.org/index.php/Tutorial'>Perf Tutorial</ulink> | ||
| 1340 | </para> | ||
| 1341 | </section> | ||
| 1342 | </section> | ||
| 1343 | |||
| 1344 | <section id='profile-manual-ftrace'> | ||
| 1345 | <title>ftrace</title> | ||
| 1346 | |||
| 1347 | <para> | ||
| 1348 | 'ftrace' literally refers to the 'ftrace function tracer' but in | ||
| 1349 | reality this encompasses a number of related tracers along with | ||
| 1350 | the infrastructure that they all make use of. | ||
| 1351 | </para> | ||
| 1352 | |||
| 1353 | <section id='ftrace-setup'> | ||
| 1354 | <title>Setup</title> | ||
| 1355 | |||
| 1356 | <para> | ||
| 1357 | For this section, we'll assume you've already performed the basic | ||
| 1358 | setup outlined in the General Setup section. | ||
| 1359 | </para> | ||
| 1360 | |||
| 1361 | <para> | ||
| 1362 | ftrace, trace-cmd, and kernelshark run on the target system, | ||
| 1363 | and are ready to go out-of-the-box - no additional setup is | ||
| 1364 | necessary. For the rest of this section we assume you've ssh'ed | ||
| 1365 | to the host and will be running ftrace on the target. kernelshark | ||
| 1366 | is a GUI application and if you use the '-X' option to ssh you | ||
| 1367 | can have the kernelshark GUI run on the target but display | ||
| 1368 | remotely on the host if you want. | ||
| 1369 | </para> | ||
| 1370 | </section> | ||
| 1371 | |||
| 1372 | <section id='basic-ftrace-usage'> | ||
| 1373 | <title>Basic ftrace usage</title> | ||
| 1374 | |||
| 1375 | <para> | ||
| 1376 | 'ftrace' essentially refers to everything included in | ||
| 1377 | the /tracing directory of the mounted debugfs filesystem | ||
| 1378 | (Yocto follows the standard convention and mounts it | ||
| 1379 | at /sys/kernel/debug). Here's a listing of all the files | ||
| 1380 | found in /sys/kernel/debug/tracing on a Yocto system: | ||
| 1381 | <literallayout class='monospaced'> | ||
| 1382 | root@sugarbay:/sys/kernel/debug/tracing# ls | ||
| 1383 | README kprobe_events trace | ||
| 1384 | available_events kprobe_profile trace_clock | ||
| 1385 | available_filter_functions options trace_marker | ||
| 1386 | available_tracers per_cpu trace_options | ||
| 1387 | buffer_size_kb printk_formats trace_pipe | ||
| 1388 | buffer_total_size_kb saved_cmdlines tracing_cpumask | ||
| 1389 | current_tracer set_event tracing_enabled | ||
| 1390 | dyn_ftrace_total_info set_ftrace_filter tracing_on | ||
| 1391 | enabled_functions set_ftrace_notrace tracing_thresh | ||
| 1392 | events set_ftrace_pid | ||
| 1393 | free_buffer set_graph_function | ||
| 1394 | </literallayout> | ||
| 1395 | The files listed above are used for various purposes - | ||
| 1396 | some relate directly to the tracers themselves, others are | ||
| 1397 | used to set tracing options, and yet others actually contain | ||
| 1398 | the tracing output when a tracer is in effect. Some of the | ||
| 1399 | functions can be guessed from their names, others need | ||
| 1400 | explanation; in any case, we'll cover some of the files we | ||
| 1401 | see here below but for an explanation of the others, please | ||
| 1402 | see the ftrace documentation. | ||
| 1403 | </para> | ||
| 1404 | |||
| 1405 | <para> | ||
| 1406 | We'll start by looking at some of the available built-in | ||
| 1407 | tracers. | ||
| 1408 | </para> | ||
| 1409 | |||
| 1410 | <para> | ||
| 1411 | cat'ing the 'available_tracers' file lists the set of | ||
| 1412 | available tracers: | ||
| 1413 | <literallayout class='monospaced'> | ||
| 1414 | root@sugarbay:/sys/kernel/debug/tracing# cat available_tracers | ||
| 1415 | blk function_graph function nop | ||
| 1416 | </literallayout> | ||
| 1417 | The 'current_tracer' file contains the tracer currently in | ||
| 1418 | effect: | ||
| 1419 | <literallayout class='monospaced'> | ||
| 1420 | root@sugarbay:/sys/kernel/debug/tracing# cat current_tracer | ||
| 1421 | nop | ||
| 1422 | </literallayout> | ||
| 1423 | The above listing of current_tracer shows that | ||
| 1424 | the 'nop' tracer is in effect, which is just another | ||
| 1425 | way of saying that there's actually no tracer | ||
| 1426 | currently in effect. | ||
| 1427 | </para> | ||
| 1428 | |||
| 1429 | <para> | ||
| 1430 | echo'ing one of the available_tracers into current_tracer | ||
| 1431 | makes the specified tracer the current tracer: | ||
| 1432 | <literallayout class='monospaced'> | ||
| 1433 | root@sugarbay:/sys/kernel/debug/tracing# echo function > current_tracer | ||
| 1434 | root@sugarbay:/sys/kernel/debug/tracing# cat current_tracer | ||
| 1435 | function | ||
| 1436 | </literallayout> | ||
| 1437 | The above sets the current tracer to be the | ||
| 1438 | 'function tracer'. This tracer traces every function | ||
| 1439 | call in the kernel and makes it available as the | ||
| 1440 | contents of the 'trace' file. Reading the 'trace' file | ||
| 1441 | lists the currently buffered function calls that have been | ||
| 1442 | traced by the function tracer: | ||
| 1443 | <literallayout class='monospaced'> | ||
| 1444 | root@sugarbay:/sys/kernel/debug/tracing# cat trace | less | ||
| 1445 | |||
| 1446 | # tracer: function | ||
| 1447 | # | ||
| 1448 | # entries-in-buffer/entries-written: 310629/766471 #P:8 | ||
| 1449 | # | ||
| 1450 | # _-----=> irqs-off | ||
| 1451 | # / _----=> need-resched | ||
| 1452 | # | / _---=> hardirq/softirq | ||
| 1453 | # || / _--=> preempt-depth | ||
| 1454 | # ||| / delay | ||
| 1455 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
| 1456 | # | | | |||| | | | ||
| 1457 | <idle>-0 [004] d..1 470.867169: ktime_get_real <-intel_idle | ||
| 1458 | <idle>-0 [004] d..1 470.867170: getnstimeofday <-ktime_get_real | ||
| 1459 | <idle>-0 [004] d..1 470.867171: ns_to_timeval <-intel_idle | ||
| 1460 | <idle>-0 [004] d..1 470.867171: ns_to_timespec <-ns_to_timeval | ||
| 1461 | <idle>-0 [004] d..1 470.867172: smp_apic_timer_interrupt <-apic_timer_interrupt | ||
| 1462 | <idle>-0 [004] d..1 470.867172: native_apic_mem_write <-smp_apic_timer_interrupt | ||
| 1463 | <idle>-0 [004] d..1 470.867172: irq_enter <-smp_apic_timer_interrupt | ||
| 1464 | <idle>-0 [004] d..1 470.867172: rcu_irq_enter <-irq_enter | ||
| 1465 | <idle>-0 [004] d..1 470.867173: rcu_idle_exit_common.isra.33 <-rcu_irq_enter | ||
| 1466 | <idle>-0 [004] d..1 470.867173: local_bh_disable <-irq_enter | ||
| 1467 | <idle>-0 [004] d..1 470.867173: add_preempt_count <-local_bh_disable | ||
| 1468 | <idle>-0 [004] d.s1 470.867174: tick_check_idle <-irq_enter | ||
| 1469 | <idle>-0 [004] d.s1 470.867174: tick_check_oneshot_broadcast <-tick_check_idle | ||
| 1470 | <idle>-0 [004] d.s1 470.867174: ktime_get <-tick_check_idle | ||
| 1471 | <idle>-0 [004] d.s1 470.867174: tick_nohz_stop_idle <-tick_check_idle | ||
| 1472 | <idle>-0 [004] d.s1 470.867175: update_ts_time_stats <-tick_nohz_stop_idle | ||
| 1473 | <idle>-0 [004] d.s1 470.867175: nr_iowait_cpu <-update_ts_time_stats | ||
| 1474 | <idle>-0 [004] d.s1 470.867175: tick_do_update_jiffies64 <-tick_check_idle | ||
| 1475 | <idle>-0 [004] d.s1 470.867175: _raw_spin_lock <-tick_do_update_jiffies64 | ||
| 1476 | <idle>-0 [004] d.s1 470.867176: add_preempt_count <-_raw_spin_lock | ||
| 1477 | <idle>-0 [004] d.s2 470.867176: do_timer <-tick_do_update_jiffies64 | ||
| 1478 | <idle>-0 [004] d.s2 470.867176: _raw_spin_lock <-do_timer | ||
| 1479 | <idle>-0 [004] d.s2 470.867176: add_preempt_count <-_raw_spin_lock | ||
| 1480 | <idle>-0 [004] d.s3 470.867177: ntp_tick_length <-do_timer | ||
| 1481 | <idle>-0 [004] d.s3 470.867177: _raw_spin_lock_irqsave <-ntp_tick_length | ||
| 1482 | . | ||
| 1483 | . | ||
| 1484 | . | ||
| 1485 | </literallayout> | ||
| 1486 | Each line in the trace above shows what was happening in | ||
| 1487 | the kernel on a given cpu, to the level of detail of | ||
| 1488 | function calls. Each entry shows the function called, | ||
| 1489 | followed by its caller (after the arrow). | ||
| 1490 | </para> | ||
| 1491 | |||
| 1492 | <para> | ||
| 1493 | The function tracer gives you an extremely detailed idea | ||
| 1494 | of what the kernel was doing at the point in time the trace | ||
| 1495 | was taken, and is a great way to learn about how the kernel | ||
| 1496 | code works in a dynamic sense. | ||
| 1497 | </para> | ||
| 1498 | |||
| 1499 | <informalexample> | ||
| 1500 | <emphasis>Tying it Together:</emphasis> The ftrace function tracer is also | ||
| 1501 | available from within perf, as the ftrace:function tracepoint. | ||
| 1502 | </informalexample> | ||
| 1503 | |||
| 1504 | <para> | ||
| 1505 | It is a little more difficult to follow the call chains than | ||
| 1506 | it needs to be - luckily there's a variant of the function | ||
| 1507 | tracer that displays the callchains explicitly, called the | ||
| 1508 | 'function_graph' tracer: | ||
| 1509 | <literallayout class='monospaced'> | ||
| 1510 | root@sugarbay:/sys/kernel/debug/tracing# echo function_graph > current_tracer | ||
| 1511 | root@sugarbay:/sys/kernel/debug/tracing# cat trace | less | ||
| 1512 | |||
| 1513 | tracer: function_graph | ||
| 1514 | |||
| 1515 | CPU DURATION FUNCTION CALLS | ||
| 1516 | | | | | | | | | ||
| 1517 | 7) 0.046 us | pick_next_task_fair(); | ||
| 1518 | 7) 0.043 us | pick_next_task_stop(); | ||
| 1519 | 7) 0.042 us | pick_next_task_rt(); | ||
| 1520 | 7) 0.032 us | pick_next_task_fair(); | ||
| 1521 | 7) 0.030 us | pick_next_task_idle(); | ||
| 1522 | 7) | _raw_spin_unlock_irq() { | ||
| 1523 | 7) 0.033 us | sub_preempt_count(); | ||
| 1524 | 7) 0.258 us | } | ||
| 1525 | 7) 0.032 us | sub_preempt_count(); | ||
| 1526 | 7) + 13.341 us | } /* __schedule */ | ||
| 1527 | 7) 0.095 us | } /* sub_preempt_count */ | ||
| 1528 | 7) | schedule() { | ||
| 1529 | 7) | __schedule() { | ||
| 1530 | 7) 0.060 us | add_preempt_count(); | ||
| 1531 | 7) 0.044 us | rcu_note_context_switch(); | ||
| 1532 | 7) | _raw_spin_lock_irq() { | ||
| 1533 | 7) 0.033 us | add_preempt_count(); | ||
| 1534 | 7) 0.247 us | } | ||
| 1535 | 7) | idle_balance() { | ||
| 1536 | 7) | _raw_spin_unlock() { | ||
| 1537 | 7) 0.031 us | sub_preempt_count(); | ||
| 1538 | 7) 0.246 us | } | ||
| 1539 | 7) | update_shares() { | ||
| 1540 | 7) 0.030 us | __rcu_read_lock(); | ||
| 1541 | 7) 0.029 us | __rcu_read_unlock(); | ||
| 1542 | 7) 0.484 us | } | ||
| 1543 | 7) 0.030 us | __rcu_read_lock(); | ||
| 1544 | 7) | load_balance() { | ||
| 1545 | 7) | find_busiest_group() { | ||
| 1546 | 7) 0.031 us | idle_cpu(); | ||
| 1547 | 7) 0.029 us | idle_cpu(); | ||
| 1548 | 7) 0.035 us | idle_cpu(); | ||
| 1549 | 7) 0.906 us | } | ||
| 1550 | 7) 1.141 us | } | ||
| 1551 | 7) 0.022 us | msecs_to_jiffies(); | ||
| 1552 | 7) | load_balance() { | ||
| 1553 | 7) | find_busiest_group() { | ||
| 1554 | 7) 0.031 us | idle_cpu(); | ||
| 1555 | . | ||
| 1556 | . | ||
| 1557 | . | ||
| 1558 | 4) 0.062 us | msecs_to_jiffies(); | ||
| 1559 | 4) 0.062 us | __rcu_read_unlock(); | ||
| 1560 | 4) | _raw_spin_lock() { | ||
| 1561 | 4) 0.073 us | add_preempt_count(); | ||
| 1562 | 4) 0.562 us | } | ||
| 1563 | 4) + 17.452 us | } | ||
| 1564 | 4) 0.108 us | put_prev_task_fair(); | ||
| 1565 | 4) 0.102 us | pick_next_task_fair(); | ||
| 1566 | 4) 0.084 us | pick_next_task_stop(); | ||
| 1567 | 4) 0.075 us | pick_next_task_rt(); | ||
| 1568 | 4) 0.062 us | pick_next_task_fair(); | ||
| 1569 | 4) 0.066 us | pick_next_task_idle(); | ||
| 1570 | ------------------------------------------ | ||
| 1571 | 4) kworker-74 => <idle>-0 | ||
| 1572 | ------------------------------------------ | ||
| 1573 | |||
| 1574 | 4) | finish_task_switch() { | ||
| 1575 | 4) | _raw_spin_unlock_irq() { | ||
| 1576 | 4) 0.100 us | sub_preempt_count(); | ||
| 1577 | 4) 0.582 us | } | ||
| 1578 | 4) 1.105 us | } | ||
| 1579 | 4) 0.088 us | sub_preempt_count(); | ||
| 1580 | 4) ! 100.066 us | } | ||
| 1581 | . | ||
| 1582 | . | ||
| 1583 | . | ||
| 1584 | 3) | sys_ioctl() { | ||
| 1585 | 3) 0.083 us | fget_light(); | ||
| 1586 | 3) | security_file_ioctl() { | ||
| 1587 | 3) 0.066 us | cap_file_ioctl(); | ||
| 1588 | 3) 0.562 us | } | ||
| 1589 | 3) | do_vfs_ioctl() { | ||
| 1590 | 3) | drm_ioctl() { | ||
| 1591 | 3) 0.075 us | drm_ut_debug_printk(); | ||
| 1592 | 3) | i915_gem_pwrite_ioctl() { | ||
| 1593 | 3) | i915_mutex_lock_interruptible() { | ||
| 1594 | 3) 0.070 us | mutex_lock_interruptible(); | ||
| 1595 | 3) 0.570 us | } | ||
| 1596 | 3) | drm_gem_object_lookup() { | ||
| 1597 | 3) | _raw_spin_lock() { | ||
| 1598 | 3) 0.080 us | add_preempt_count(); | ||
| 1599 | 3) 0.620 us | } | ||
| 1600 | 3) | _raw_spin_unlock() { | ||
| 1601 | 3) 0.085 us | sub_preempt_count(); | ||
| 1602 | 3) 0.562 us | } | ||
| 1603 | 3) 2.149 us | } | ||
| 1604 | 3) 0.133 us | i915_gem_object_pin(); | ||
| 1605 | 3) | i915_gem_object_set_to_gtt_domain() { | ||
| 1606 | 3) 0.065 us | i915_gem_object_flush_gpu_write_domain(); | ||
| 1607 | 3) 0.065 us | i915_gem_object_wait_rendering(); | ||
| 1608 | 3) 0.062 us | i915_gem_object_flush_cpu_write_domain(); | ||
| 1609 | 3) 1.612 us | } | ||
| 1610 | 3) | i915_gem_object_put_fence() { | ||
| 1611 | 3) 0.097 us | i915_gem_object_flush_fence.constprop.36(); | ||
| 1612 | 3) 0.645 us | } | ||
| 1613 | 3) 0.070 us | add_preempt_count(); | ||
| 1614 | 3) 0.070 us | sub_preempt_count(); | ||
| 1615 | 3) 0.073 us | i915_gem_object_unpin(); | ||
| 1616 | 3) 0.068 us | mutex_unlock(); | ||
| 1617 | 3) 9.924 us | } | ||
| 1618 | 3) + 11.236 us | } | ||
| 1619 | 3) + 11.770 us | } | ||
| 1620 | 3) + 13.784 us | } | ||
| 1621 | 3) | sys_ioctl() { | ||
| 1622 | </literallayout> | ||
| 1623 | As you can see, the function_graph display is much easier to | ||
| 1624 | follow. Also note that in addition to the function calls and | ||
| 1625 | associated braces, other events such as scheduler events | ||
| 1626 | are displayed in context. In fact, you can freely include | ||
| 1627 | any tracepoint available in the trace events subsystem described | ||
| 1628 | in the next section by simply enabling those events, and they'll | ||
| 1629 | appear in context in the function graph display. Quite a | ||
| 1630 | powerful tool for understanding kernel dynamics. | ||
| 1631 | </para> | ||
| 1632 | |||
| 1633 | <para> | ||
| 1634 | Also notice that there are various annotations on the left | ||
| 1635 | hand side of the display. For example if the total time it | ||
| 1636 | took for a given function to execute is above a certain | ||
| 1637 | threshold, an exclamation point or plus sign appears on the | ||
| 1638 | left hand side. Please see the ftrace documentation for | ||
| 1639 | details on all these fields. | ||
| 1640 | </para> | ||
| 1641 | </section> | ||
| 1642 | |||
| 1643 | <section id='the-trace-events-subsystem'> | ||
| 1644 | <title>The 'trace events' Subsystem</title> | ||
| 1645 | |||
| 1646 | <para> | ||
| 1647 | One especially important directory contained within | ||
| 1648 | the /sys/kernel/debug/tracing directory is the 'events' | ||
| 1649 | subdirectory, which contains representations of every | ||
| 1650 | tracepoint in the system. Listing out the contents of | ||
| 1651 | the 'events' subdirectory, we see mainly another set of | ||
| 1652 | subdirectories: | ||
| 1653 | <literallayout class='monospaced'> | ||
| 1654 | root@sugarbay:/sys/kernel/debug/tracing# cd events | ||
| 1655 | root@sugarbay:/sys/kernel/debug/tracing/events# ls -al | ||
| 1656 | drwxr-xr-x 38 root root 0 Nov 14 23:19 . | ||
| 1657 | drwxr-xr-x 5 root root 0 Nov 14 23:19 .. | ||
| 1658 | drwxr-xr-x 19 root root 0 Nov 14 23:19 block | ||
| 1659 | drwxr-xr-x 32 root root 0 Nov 14 23:19 btrfs | ||
| 1660 | drwxr-xr-x 5 root root 0 Nov 14 23:19 drm | ||
| 1661 | -rw-r--r-- 1 root root 0 Nov 14 23:19 enable | ||
| 1662 | drwxr-xr-x 40 root root 0 Nov 14 23:19 ext3 | ||
| 1663 | drwxr-xr-x 79 root root 0 Nov 14 23:19 ext4 | ||
| 1664 | drwxr-xr-x 14 root root 0 Nov 14 23:19 ftrace | ||
| 1665 | drwxr-xr-x 8 root root 0 Nov 14 23:19 hda | ||
| 1666 | -r--r--r-- 1 root root 0 Nov 14 23:19 header_event | ||
| 1667 | -r--r--r-- 1 root root 0 Nov 14 23:19 header_page | ||
| 1668 | drwxr-xr-x 25 root root 0 Nov 14 23:19 i915 | ||
| 1669 | drwxr-xr-x 7 root root 0 Nov 14 23:19 irq | ||
| 1670 | drwxr-xr-x 12 root root 0 Nov 14 23:19 jbd | ||
| 1671 | drwxr-xr-x 14 root root 0 Nov 14 23:19 jbd2 | ||
| 1672 | drwxr-xr-x 14 root root 0 Nov 14 23:19 kmem | ||
| 1673 | drwxr-xr-x 7 root root 0 Nov 14 23:19 module | ||
| 1674 | drwxr-xr-x 3 root root 0 Nov 14 23:19 napi | ||
| 1675 | drwxr-xr-x 6 root root 0 Nov 14 23:19 net | ||
| 1676 | drwxr-xr-x 3 root root 0 Nov 14 23:19 oom | ||
| 1677 | drwxr-xr-x 12 root root 0 Nov 14 23:19 power | ||
| 1678 | drwxr-xr-x 3 root root 0 Nov 14 23:19 printk | ||
| 1679 | drwxr-xr-x 8 root root 0 Nov 14 23:19 random | ||
| 1680 | drwxr-xr-x 4 root root 0 Nov 14 23:19 raw_syscalls | ||
| 1681 | drwxr-xr-x 3 root root 0 Nov 14 23:19 rcu | ||
| 1682 | drwxr-xr-x 6 root root 0 Nov 14 23:19 rpm | ||
| 1683 | drwxr-xr-x 20 root root 0 Nov 14 23:19 sched | ||
| 1684 | drwxr-xr-x 7 root root 0 Nov 14 23:19 scsi | ||
| 1685 | drwxr-xr-x 4 root root 0 Nov 14 23:19 signal | ||
| 1686 | drwxr-xr-x 5 root root 0 Nov 14 23:19 skb | ||
| 1687 | drwxr-xr-x 4 root root 0 Nov 14 23:19 sock | ||
| 1688 | drwxr-xr-x 10 root root 0 Nov 14 23:19 sunrpc | ||
| 1689 | drwxr-xr-x 538 root root 0 Nov 14 23:19 syscalls | ||
| 1690 | drwxr-xr-x 4 root root 0 Nov 14 23:19 task | ||
| 1691 | drwxr-xr-x 14 root root 0 Nov 14 23:19 timer | ||
| 1692 | drwxr-xr-x 3 root root 0 Nov 14 23:19 udp | ||
| 1693 | drwxr-xr-x 21 root root 0 Nov 14 23:19 vmscan | ||
| 1694 | drwxr-xr-x 3 root root 0 Nov 14 23:19 vsyscall | ||
| 1695 | drwxr-xr-x 6 root root 0 Nov 14 23:19 workqueue | ||
| 1696 | drwxr-xr-x 26 root root 0 Nov 14 23:19 writeback | ||
| 1697 | </literallayout> | ||
| 1698 | Each one of these subdirectories corresponds to a | ||
| 1699 | 'subsystem' and contains yet again more subdirectories, | ||
| 1700 | each one of those finally corresponding to a tracepoint. | ||
| 1701 | For example, here are the contents of the 'kmem' subsystem: | ||
| 1702 | <literallayout class='monospaced'> | ||
| 1703 | root@sugarbay:/sys/kernel/debug/tracing/events# cd kmem | ||
| 1704 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem# ls -al | ||
| 1705 | drwxr-xr-x 14 root root 0 Nov 14 23:19 . | ||
| 1706 | drwxr-xr-x 38 root root 0 Nov 14 23:19 .. | ||
| 1707 | -rw-r--r-- 1 root root 0 Nov 14 23:19 enable | ||
| 1708 | -rw-r--r-- 1 root root 0 Nov 14 23:19 filter | ||
| 1709 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kfree | ||
| 1710 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kmalloc | ||
| 1711 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kmalloc_node | ||
| 1712 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kmem_cache_alloc | ||
| 1713 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kmem_cache_alloc_node | ||
| 1714 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kmem_cache_free | ||
| 1715 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_alloc | ||
| 1716 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_alloc_extfrag | ||
| 1717 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_alloc_zone_locked | ||
| 1718 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_free | ||
| 1719 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_free_batched | ||
| 1720 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_pcpu_drain | ||
| 1721 | </literallayout> | ||
| 1722 | Let's see what's inside the subdirectory for a specific | ||
| 1723 | tracepoint, in this case the one for kmalloc: | ||
| 1724 | <literallayout class='monospaced'> | ||
| 1725 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem# cd kmalloc | ||
| 1726 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# ls -al | ||
| 1727 | drwxr-xr-x 2 root root 0 Nov 14 23:19 . | ||
| 1728 | drwxr-xr-x 14 root root 0 Nov 14 23:19 .. | ||
| 1729 | -rw-r--r-- 1 root root 0 Nov 14 23:19 enable | ||
| 1730 | -rw-r--r-- 1 root root 0 Nov 14 23:19 filter | ||
| 1731 | -r--r--r-- 1 root root 0 Nov 14 23:19 format | ||
| 1732 | -r--r--r-- 1 root root 0 Nov 14 23:19 id | ||
| 1733 | </literallayout> | ||
| 1734 | The 'format' file for the tracepoint describes the event | ||
| 1735 | in memory, which is used by the various tracing tools | ||
| 1736 | that now make use of these tracepoint to parse the event | ||
| 1737 | and make sense of it, along with a 'print fmt' field that | ||
| 1738 | allows tools like ftrace to display the event as text. | ||
| 1739 | Here's what the format of the kmalloc event looks like: | ||
| 1740 | <literallayout class='monospaced'> | ||
| 1741 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# cat format | ||
| 1742 | name: kmalloc | ||
| 1743 | ID: 313 | ||
| 1744 | format: | ||
| 1745 | field:unsigned short common_type; offset:0; size:2; signed:0; | ||
| 1746 | field:unsigned char common_flags; offset:2; size:1; signed:0; | ||
| 1747 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; | ||
| 1748 | field:int common_pid; offset:4; size:4; signed:1; | ||
| 1749 | field:int common_padding; offset:8; size:4; signed:1; | ||
| 1750 | |||
| 1751 | field:unsigned long call_site; offset:16; size:8; signed:0; | ||
| 1752 | field:const void * ptr; offset:24; size:8; signed:0; | ||
| 1753 | field:size_t bytes_req; offset:32; size:8; signed:0; | ||
| 1754 | field:size_t bytes_alloc; offset:40; size:8; signed:0; | ||
| 1755 | field:gfp_t gfp_flags; offset:48; size:4; signed:0; | ||
| 1756 | |||
| 1757 | print fmt: "call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", REC->call_site, REC->ptr, REC->bytes_req, REC->bytes_alloc, | ||
| 1758 | (REC->gfp_flags) ? __print_flags(REC->gfp_flags, "|", {(unsigned long)(((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( | ||
| 1759 | gfp_t)0x20000u) | (( gfp_t)0x02u) | (( gfp_t)0x08u)) | (( gfp_t)0x4000u) | (( gfp_t)0x10000u) | (( gfp_t)0x1000u) | (( gfp_t)0x200u) | (( | ||
| 1760 | gfp_t)0x400000u)), "GFP_TRANSHUGE"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( gfp_t)0x20000u) | (( | ||
| 1761 | gfp_t)0x02u) | (( gfp_t)0x08u)), "GFP_HIGHUSER_MOVABLE"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( | ||
| 1762 | gfp_t)0x20000u) | (( gfp_t)0x02u)), "GFP_HIGHUSER"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( | ||
| 1763 | gfp_t)0x20000u)), "GFP_USER"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( gfp_t)0x80000u)), GFP_TEMPORARY"}, | ||
| 1764 | {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u)), "GFP_KERNEL"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u)), | ||
| 1765 | "GFP_NOFS"}, {(unsigned long)((( gfp_t)0x20u)), "GFP_ATOMIC"}, {(unsigned long)((( gfp_t)0x10u)), "GFP_NOIO"}, {(unsigned long)(( | ||
| 1766 | gfp_t)0x20u), "GFP_HIGH"}, {(unsigned long)(( gfp_t)0x10u), "GFP_WAIT"}, {(unsigned long)(( gfp_t)0x40u), "GFP_IO"}, {(unsigned long)(( | ||
| 1767 | gfp_t)0x100u), "GFP_COLD"}, {(unsigned long)(( gfp_t)0x200u), "GFP_NOWARN"}, {(unsigned long)(( gfp_t)0x400u), "GFP_REPEAT"}, {(unsigned | ||
| 1768 | long)(( gfp_t)0x800u), "GFP_NOFAIL"}, {(unsigned long)(( gfp_t)0x1000u), "GFP_NORETRY"}, {(unsigned long)(( gfp_t)0x4000u), "GFP_COMP"}, | ||
| 1769 | {(unsigned long)(( gfp_t)0x8000u), "GFP_ZERO"}, {(unsigned long)(( gfp_t)0x10000u), "GFP_NOMEMALLOC"}, {(unsigned long)(( gfp_t)0x20000u), | ||
| 1770 | "GFP_HARDWALL"}, {(unsigned long)(( gfp_t)0x40000u), "GFP_THISNODE"}, {(unsigned long)(( gfp_t)0x80000u), "GFP_RECLAIMABLE"}, {(unsigned | ||
| 1771 | long)(( gfp_t)0x08u), "GFP_MOVABLE"}, {(unsigned long)(( gfp_t)0), "GFP_NOTRACK"}, {(unsigned long)(( gfp_t)0x400000u), "GFP_NO_KSWAPD"}, | ||
| 1772 | {(unsigned long)(( gfp_t)0x800000u), "GFP_OTHER_NODE"} ) : "GFP_NOWAIT" | ||
| 1773 | </literallayout> | ||
| 1774 | The 'enable' file in the tracepoint directory is what allows | ||
| 1775 | the user (or tools such as trace-cmd) to actually turn the | ||
| 1776 | tracepoint on and off. When enabled, the corresponding | ||
| 1777 | tracepoint will start appearing in the ftrace 'trace' | ||
| 1778 | file described previously. For example, this turns on the | ||
| 1779 | kmalloc tracepoint: | ||
| 1780 | <literallayout class='monospaced'> | ||
| 1781 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# echo 1 > enable | ||
| 1782 | </literallayout> | ||
| 1783 | At the moment, we're not interested in the function tracer or | ||
| 1784 | some other tracer that might be in effect, so we first turn | ||
| 1785 | it off, but if we do that, we still need to turn tracing on in | ||
| 1786 | order to see the events in the output buffer: | ||
| 1787 | <literallayout class='monospaced'> | ||
| 1788 | root@sugarbay:/sys/kernel/debug/tracing# echo nop > current_tracer | ||
| 1789 | root@sugarbay:/sys/kernel/debug/tracing# echo 1 > tracing_on | ||
| 1790 | </literallayout> | ||
| 1791 | Now, if we look at the the 'trace' file, we see nothing | ||
| 1792 | but the kmalloc events we just turned on: | ||
| 1793 | <literallayout class='monospaced'> | ||
| 1794 | root@sugarbay:/sys/kernel/debug/tracing# cat trace | less | ||
| 1795 | # tracer: nop | ||
| 1796 | # | ||
| 1797 | # entries-in-buffer/entries-written: 1897/1897 #P:8 | ||
| 1798 | # | ||
| 1799 | # _-----=> irqs-off | ||
| 1800 | # / _----=> need-resched | ||
| 1801 | # | / _---=> hardirq/softirq | ||
| 1802 | # || / _--=> preempt-depth | ||
| 1803 | # ||| / delay | ||
| 1804 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
| 1805 | # | | | |||| | | | ||
| 1806 | dropbear-1465 [000] ...1 18154.620753: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
| 1807 | <idle>-0 [000] ..s3 18154.621640: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1808 | <idle>-0 [000] ..s3 18154.621656: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1809 | matchbox-termin-1361 [001] ...1 18154.755472: kmalloc: call_site=ffffffff81614050 ptr=ffff88006d5f0e00 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_KERNEL|GFP_REPEAT | ||
| 1810 | Xorg-1264 [002] ...1 18154.755581: kmalloc: call_site=ffffffff8141abe8 ptr=ffff8800734f4cc0 bytes_req=168 bytes_alloc=192 gfp_flags=GFP_KERNEL|GFP_NOWARN|GFP_NORETRY | ||
| 1811 | Xorg-1264 [002] ...1 18154.755583: kmalloc: call_site=ffffffff814192a3 ptr=ffff88001f822520 bytes_req=24 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
| 1812 | Xorg-1264 [002] ...1 18154.755589: kmalloc: call_site=ffffffff81419edb ptr=ffff8800721a2f00 bytes_req=64 bytes_alloc=64 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
| 1813 | matchbox-termin-1361 [001] ...1 18155.354594: kmalloc: call_site=ffffffff81614050 ptr=ffff88006db35400 bytes_req=576 bytes_alloc=1024 gfp_flags=GFP_KERNEL|GFP_REPEAT | ||
| 1814 | Xorg-1264 [002] ...1 18155.354703: kmalloc: call_site=ffffffff8141abe8 ptr=ffff8800734f4cc0 bytes_req=168 bytes_alloc=192 gfp_flags=GFP_KERNEL|GFP_NOWARN|GFP_NORETRY | ||
| 1815 | Xorg-1264 [002] ...1 18155.354705: kmalloc: call_site=ffffffff814192a3 ptr=ffff88001f822520 bytes_req=24 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
| 1816 | Xorg-1264 [002] ...1 18155.354711: kmalloc: call_site=ffffffff81419edb ptr=ffff8800721a2f00 bytes_req=64 bytes_alloc=64 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
| 1817 | <idle>-0 [000] ..s3 18155.673319: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1818 | dropbear-1465 [000] ...1 18155.673525: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
| 1819 | <idle>-0 [000] ..s3 18155.674821: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1820 | <idle>-0 [000] ..s3 18155.793014: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1821 | dropbear-1465 [000] ...1 18155.793219: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
| 1822 | <idle>-0 [000] ..s3 18155.794147: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1823 | <idle>-0 [000] ..s3 18155.936705: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1824 | dropbear-1465 [000] ...1 18155.936910: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
| 1825 | <idle>-0 [000] ..s3 18155.937869: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1826 | matchbox-termin-1361 [001] ...1 18155.953667: kmalloc: call_site=ffffffff81614050 ptr=ffff88006d5f2000 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_KERNEL|GFP_REPEAT | ||
| 1827 | Xorg-1264 [002] ...1 18155.953775: kmalloc: call_site=ffffffff8141abe8 ptr=ffff8800734f4cc0 bytes_req=168 bytes_alloc=192 gfp_flags=GFP_KERNEL|GFP_NOWARN|GFP_NORETRY | ||
| 1828 | Xorg-1264 [002] ...1 18155.953777: kmalloc: call_site=ffffffff814192a3 ptr=ffff88001f822520 bytes_req=24 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
| 1829 | Xorg-1264 [002] ...1 18155.953783: kmalloc: call_site=ffffffff81419edb ptr=ffff8800721a2f00 bytes_req=64 bytes_alloc=64 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
| 1830 | <idle>-0 [000] ..s3 18156.176053: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1831 | dropbear-1465 [000] ...1 18156.176257: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
| 1832 | <idle>-0 [000] ..s3 18156.177717: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1833 | <idle>-0 [000] ..s3 18156.399229: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1834 | dropbear-1465 [000] ...1 18156.399434: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_http://rostedt.homelinux.com/kernelshark/req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
| 1835 | <idle>-0 [000] ..s3 18156.400660: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
| 1836 | matchbox-termin-1361 [001] ...1 18156.552800: kmalloc: call_site=ffffffff81614050 ptr=ffff88006db34800 bytes_req=576 bytes_alloc=1024 gfp_flags=GFP_KERNEL|GFP_REPEAT | ||
| 1837 | </literallayout> | ||
| 1838 | To again disable the kmalloc event, we need to send 0 to the | ||
| 1839 | enable file: | ||
| 1840 | <literallayout class='monospaced'> | ||
| 1841 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# echo 0 > enable | ||
| 1842 | </literallayout> | ||
| 1843 | You can enable any number of events or complete subsystems | ||
| 1844 | (by using the 'enable' file in the subsystem directory) and | ||
| 1845 | get an arbitrarily fine-grained idea of what's going on in the | ||
| 1846 | system by enabling as many of the appropriate tracepoints | ||
| 1847 | as applicable. | ||
| 1848 | </para> | ||
| 1849 | |||
| 1850 | <para> | ||
| 1851 | A number of the tools described in this HOWTO do just that, | ||
| 1852 | including trace-cmd and kernelshark in the next section. | ||
| 1853 | </para> | ||
| 1854 | |||
| 1855 | <informalexample> | ||
| 1856 | <emphasis>Tying it Together:</emphasis> These tracepoints and their representation | ||
| 1857 | are used not only by ftrace, but by many of the other tools | ||
| 1858 | covered in this document and they form a central point of | ||
| 1859 | integration for the various tracers available in Linux. | ||
| 1860 | They form a central part of the instrumentation for the | ||
| 1861 | following tools: perf, lttng, ftrace, blktrace and SystemTap | ||
| 1862 | </informalexample> | ||
| 1863 | |||
| 1864 | <informalexample> | ||
| 1865 | <emphasis>Tying it Together:</emphasis> Eventually all the special-purpose tracers | ||
| 1866 | currently available in /sys/kernel/debug/tracing will be | ||
| 1867 | removed and replaced with equivalent tracers based on the | ||
| 1868 | 'trace events' subsystem. | ||
| 1869 | </informalexample> | ||
| 1870 | </section> | ||
| 1871 | |||
| 1872 | <section id='trace-cmd-kernelshark'> | ||
| 1873 | <title>trace-cmd/kernelshark</title> | ||
| 1874 | |||
| 1875 | <para> | ||
| 1876 | trace-cmd is essentially an extensive command-line 'wrapper' | ||
| 1877 | interface that hides the details of all the individual files | ||
| 1878 | in /sys/kernel/debug/tracing, allowing users to specify | ||
| 1879 | specific particular events within the | ||
| 1880 | /sys/kernel/debug/tracing/events/ subdirectory and to collect | ||
| 1881 | traces and avoid having to deal with those details directly. | ||
| 1882 | </para> | ||
| 1883 | |||
| 1884 | <para> | ||
| 1885 | As yet another layer on top of that, kernelshark provides a GUI | ||
| 1886 | that allows users to start and stop traces and specify sets | ||
| 1887 | of events using an intuitive interface, and view the | ||
| 1888 | output as both trace events and as a per-CPU graphical | ||
| 1889 | display. It directly uses 'trace-cmd' as the plumbing | ||
| 1890 | that accomplishes all that underneath the covers (and | ||
| 1891 | actually displays the trace-cmd command it uses, as we'll see). | ||
| 1892 | </para> | ||
| 1893 | |||
| 1894 | <para> | ||
| 1895 | To start a trace using kernelshark, first start kernelshark: | ||
| 1896 | <literallayout class='monospaced'> | ||
| 1897 | root@sugarbay:~# kernelshark | ||
| 1898 | </literallayout> | ||
| 1899 | Then bring up the 'Capture' dialog by choosing from the | ||
| 1900 | kernelshark menu: | ||
| 1901 | <literallayout class='monospaced'> | ||
| 1902 | Capture | Record | ||
| 1903 | </literallayout> | ||
| 1904 | That will display the following dialog, which allows you to | ||
| 1905 | choose one or more events (or even one or more complete | ||
| 1906 | subsystems) to trace: | ||
| 1907 | </para> | ||
| 1908 | |||
| 1909 | <para> | ||
| 1910 | <imagedata fileref="figures/kernelshark-choose-events.png" width="6in" depth="6in" align="center" scalefit="1" /> | ||
| 1911 | </para> | ||
| 1912 | |||
| 1913 | <para> | ||
| 1914 | Note that these are exactly the same sets of events described | ||
| 1915 | in the previous trace events subsystem section, and in fact | ||
| 1916 | is where trace-cmd gets them for kernelshark. | ||
| 1917 | </para> | ||
| 1918 | |||
| 1919 | <para> | ||
| 1920 | In the above screenshot, we've decided to explore the | ||
| 1921 | graphics subsystem a bit and so have chosen to trace all | ||
| 1922 | the tracepoints contained within the 'i915' and 'drm' | ||
| 1923 | subsystems. | ||
| 1924 | </para> | ||
| 1925 | |||
| 1926 | <para> | ||
| 1927 | After doing that, we can start and stop the trace using | ||
| 1928 | the 'Run' and 'Stop' button on the lower right corner of | ||
| 1929 | the dialog (the same button will turn into the 'Stop' | ||
| 1930 | button after the trace has started): | ||
| 1931 | </para> | ||
| 1932 | |||
| 1933 | <para> | ||
| 1934 | <imagedata fileref="figures/kernelshark-output-display.png" width="6in" depth="6in" align="center" scalefit="1" /> | ||
| 1935 | </para> | ||
| 1936 | |||
| 1937 | <para> | ||
| 1938 | Notice that the right-hand pane shows the exact trace-cmd | ||
| 1939 | command-line that's used to run the trace, along with the | ||
| 1940 | results of the trace-cmd run. | ||
| 1941 | </para> | ||
| 1942 | |||
| 1943 | <para> | ||
| 1944 | Once the 'Stop' button is pressed, the graphical view magically | ||
| 1945 | fills up with a colorful per-cpu display of the trace data, | ||
| 1946 | along with the detailed event listing below that: | ||
| 1947 | </para> | ||
| 1948 | |||
| 1949 | <para> | ||
| 1950 | <imagedata fileref="figures/kernelshark-i915-display.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 1951 | </para> | ||
| 1952 | |||
| 1953 | <para> | ||
| 1954 | Here's another example, this time a display resulting | ||
| 1955 | from tracing 'all events': | ||
| 1956 | </para> | ||
| 1957 | |||
| 1958 | <para> | ||
| 1959 | <imagedata fileref="figures/kernelshark-all.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 1960 | </para> | ||
| 1961 | |||
| 1962 | <para> | ||
| 1963 | The tool is pretty self-explanatory, but for more detailed | ||
| 1964 | information on navigating through the data, see the | ||
| 1965 | <ulink url='http://rostedt.homelinux.com/kernelshark/'>kernelshark website</ulink>. | ||
| 1966 | </para> | ||
| 1967 | </section> | ||
| 1968 | |||
| 1969 | <section id='ftrace-documentation'> | ||
| 1970 | <title>Documentation</title> | ||
| 1971 | |||
| 1972 | <para> | ||
| 1973 | The documentation for ftrace can be found in the kernel | ||
| 1974 | Documentation directory: | ||
| 1975 | <literallayout class='monospaced'> | ||
| 1976 | Documentation/trace/ftrace.txt | ||
| 1977 | </literallayout> | ||
| 1978 | The documentation for the trace event subsystem can also | ||
| 1979 | be found in the kernel Documentation directory: | ||
| 1980 | <literallayout class='monospaced'> | ||
| 1981 | Documentation/trace/events.txt | ||
| 1982 | </literallayout> | ||
| 1983 | There is a nice series of articles on using | ||
| 1984 | ftrace and trace-cmd at LWN: | ||
| 1985 | <itemizedlist> | ||
| 1986 | <listitem><para><ulink url='http://lwn.net/Articles/365835/'>Debugging the kernel using Ftrace - part 1</ulink> | ||
| 1987 | </para></listitem> | ||
| 1988 | <listitem><para><ulink url='http://lwn.net/Articles/366796/'>Debugging the kernel using Ftrace - part 2</ulink> | ||
| 1989 | </para></listitem> | ||
| 1990 | <listitem><para><ulink url='http://lwn.net/Articles/370423/'>Secrets of the Ftrace function tracer</ulink> | ||
| 1991 | </para></listitem> | ||
| 1992 | <listitem><para><ulink url='https://lwn.net/Articles/410200/'>trace-cmd: A front-end for Ftrace</ulink> | ||
| 1993 | </para></listitem> | ||
| 1994 | </itemizedlist> | ||
| 1995 | </para> | ||
| 1996 | |||
| 1997 | <para> | ||
| 1998 | There's more detailed documentation kernelshark usage here: | ||
| 1999 | <ulink url='http://rostedt.homelinux.com/kernelshark/'>KernelShark</ulink> | ||
| 2000 | </para> | ||
| 2001 | |||
| 2002 | <para> | ||
| 2003 | An amusing yet useful README (a tracing mini-HOWTO) can be | ||
| 2004 | found in /sys/kernel/debug/tracing/README. | ||
| 2005 | </para> | ||
| 2006 | </section> | ||
| 2007 | </section> | ||
| 2008 | |||
| 2009 | <section id='profile-manual-systemtap'> | ||
| 2010 | <title>systemtap</title> | ||
| 2011 | |||
| 2012 | <para> | ||
| 2013 | SystemTap is a system-wide script-based tracing and profiling tool. | ||
| 2014 | </para> | ||
| 2015 | |||
| 2016 | <para> | ||
| 2017 | SystemTap scripts are C-like programs that are executed in the | ||
| 2018 | kernel to gather/print/aggregate data extracted from the context | ||
| 2019 | they end up being invoked under. | ||
| 2020 | </para> | ||
| 2021 | |||
| 2022 | <para> | ||
| 2023 | For example, this probe from the | ||
| 2024 | <ulink url='http://sourceware.org/systemtap/tutorial/'>SystemTap tutorial</ulink> | ||
| 2025 | simply prints a line every time any process on the system open()s | ||
| 2026 | a file. For each line, it prints the executable name of the | ||
| 2027 | program that opened the file, along with its PID, and the name | ||
| 2028 | of the file it opened (or tried to open), which it extracts | ||
| 2029 | from the open syscall's argstr. | ||
| 2030 | <literallayout class='monospaced'> | ||
| 2031 | probe syscall.open | ||
| 2032 | { | ||
| 2033 | printf ("%s(%d) open (%s)\n", execname(), pid(), argstr) | ||
| 2034 | } | ||
| 2035 | |||
| 2036 | probe timer.ms(4000) # after 4 seconds | ||
| 2037 | { | ||
| 2038 | exit () | ||
| 2039 | } | ||
| 2040 | </literallayout> | ||
| 2041 | Normally, to execute this probe, you'd simply install | ||
| 2042 | systemtap on the system you want to probe, and directly run | ||
| 2043 | the probe on that system e.g. assuming the name of the file | ||
| 2044 | containing the above text is trace_open.stp: | ||
| 2045 | <literallayout class='monospaced'> | ||
| 2046 | # stap trace_open.stp | ||
| 2047 | </literallayout> | ||
| 2048 | What systemtap does under the covers to run this probe is 1) | ||
| 2049 | parse and convert the probe to an equivalent 'C' form, 2) | ||
| 2050 | compile the 'C' form into a kernel module, 3) insert the | ||
| 2051 | module into the kernel, which arms it, and 4) collect the data | ||
| 2052 | generated by the probe and display it to the user. | ||
| 2053 | </para> | ||
| 2054 | |||
| 2055 | <para> | ||
| 2056 | In order to accomplish steps 1 and 2, the 'stap' program needs | ||
| 2057 | access to the kernel build system that produced the kernel | ||
| 2058 | that the probed system is running. In the case of a typical | ||
| 2059 | embedded system (the 'target'), the kernel build system | ||
| 2060 | unfortunately isn't typically part of the image running on | ||
| 2061 | the target. It is normally available on the 'host' system | ||
| 2062 | that produced the target image however; in such cases, | ||
| 2063 | steps 1 and 2 are executed on the host system, and steps | ||
| 2064 | 3 and 4 are executed on the target system, using only the | ||
| 2065 | systemtap 'runtime'. | ||
| 2066 | </para> | ||
| 2067 | |||
| 2068 | <para> | ||
| 2069 | The systemtap support in Yocto assumes that only steps | ||
| 2070 | 3 and 4 are run on the target; it is possible to do | ||
| 2071 | everything on the target, but this section assumes only | ||
| 2072 | the typical embedded use-case. | ||
| 2073 | </para> | ||
| 2074 | |||
| 2075 | <para> | ||
| 2076 | So basically what you need to do in order to run a systemtap | ||
| 2077 | script on the target is to 1) on the host system, compile the | ||
| 2078 | probe into a kernel module that makes sense to the target, 2) | ||
| 2079 | copy the module onto the target system and 3) insert the | ||
| 2080 | module into the target kernel, which arms it, and 4) collect | ||
| 2081 | the data generated by the probe and display it to the user. | ||
| 2082 | </para> | ||
| 2083 | |||
| 2084 | <section id='systemtap-setup'> | ||
| 2085 | <title>Setup</title> | ||
| 2086 | |||
| 2087 | <para> | ||
| 2088 | Those are a lot of steps and a lot of details, but | ||
| 2089 | fortunately Yocto includes a script called 'crosstap' | ||
| 2090 | that will take care of those details, allowing you to | ||
| 2091 | simply execute a systemtap script on the remote target, | ||
| 2092 | with arguments if necessary. | ||
| 2093 | </para> | ||
| 2094 | |||
| 2095 | <para> | ||
| 2096 | In order to do this from a remote host, however, you | ||
| 2097 | need to have access to the build for the image you | ||
| 2098 | booted. The 'crosstap' script provides details on how | ||
| 2099 | to do this if you run the script on the host without having | ||
| 2100 | done a build: | ||
| 2101 | <note> | ||
| 2102 | SystemTap, which uses 'crosstap', assumes you can establish an | ||
| 2103 | ssh connection to the remote target. | ||
| 2104 | Please refer to the crosstap wiki page for details on verifying | ||
| 2105 | ssh connections at | ||
| 2106 | <ulink url='https://wiki.yoctoproject.org/wiki/Tracing_and_Profiling#systemtap'></ulink>. | ||
| 2107 | Also, the ability to ssh into the target system is not enabled | ||
| 2108 | by default in *-minimal images. | ||
| 2109 | </note> | ||
| 2110 | <literallayout class='monospaced'> | ||
| 2111 | $ crosstap root@192.168.1.88 trace_open.stp | ||
| 2112 | |||
| 2113 | Error: No target kernel build found. | ||
| 2114 | Did you forget to create a local build of your image? | ||
| 2115 | |||
| 2116 | 'crosstap' requires a local sdk build of the target system | ||
| 2117 | (or a build that includes 'tools-profile') in order to build | ||
| 2118 | kernel modules that can probe the target system. | ||
| 2119 | |||
| 2120 | Practically speaking, that means you need to do the following: | ||
| 2121 | - If you're running a pre-built image, download the release | ||
| 2122 | and/or BSP tarballs used to build the image. | ||
| 2123 | - If you're working from git sources, just clone the metadata | ||
| 2124 | and BSP layers needed to build the image you'll be booting. | ||
| 2125 | - Make sure you're properly set up to build a new image (see | ||
| 2126 | the BSP README and/or the widely available basic documentation | ||
| 2127 | that discusses how to build images). | ||
| 2128 | - Build an -sdk version of the image e.g.: | ||
| 2129 | $ bitbake core-image-sato-sdk | ||
| 2130 | OR | ||
| 2131 | - Build a non-sdk image but include the profiling tools: | ||
| 2132 | [ edit local.conf and add 'tools-profile' to the end of | ||
| 2133 | the EXTRA_IMAGE_FEATURES variable ] | ||
| 2134 | $ bitbake core-image-sato | ||
| 2135 | |||
| 2136 | Once you've build the image on the host system, you're ready to | ||
| 2137 | boot it (or the equivalent pre-built image) and use 'crosstap' | ||
| 2138 | to probe it (you need to source the environment as usual first): | ||
| 2139 | |||
| 2140 | $ source oe-init-build-env | ||
| 2141 | $ cd ~/my/systemtap/scripts | ||
| 2142 | $ crosstap root@192.168.1.xxx myscript.stp | ||
| 2143 | </literallayout> | ||
| 2144 | So essentially what you need to do is build an SDK image or | ||
| 2145 | image with 'tools-profile' as detailed in the | ||
| 2146 | "<link linkend='profile-manual-general-setup'>General Setup</link>" | ||
| 2147 | section of this manual, and boot the resulting target image. | ||
| 2148 | </para> | ||
| 2149 | |||
| 2150 | <note> | ||
| 2151 | If you have a build directory containing multiple machines, | ||
| 2152 | you need to have the MACHINE you're connecting to selected | ||
| 2153 | in local.conf, and the kernel in that machine's build | ||
| 2154 | directory must match the kernel on the booted system exactly, | ||
| 2155 | or you'll get the above 'crosstap' message when you try to | ||
| 2156 | invoke a script. | ||
| 2157 | </note> | ||
| 2158 | </section> | ||
| 2159 | |||
| 2160 | <section id='running-a-script-on-a-target'> | ||
| 2161 | <title>Running a Script on a Target</title> | ||
| 2162 | |||
| 2163 | <para> | ||
| 2164 | Once you've done that, you should be able to run a systemtap | ||
| 2165 | script on the target: | ||
| 2166 | <literallayout class='monospaced'> | ||
| 2167 | $ cd /path/to/yocto | ||
| 2168 | $ source oe-init-build-env | ||
| 2169 | |||
| 2170 | ### Shell environment set up for builds. ### | ||
| 2171 | |||
| 2172 | You can now run 'bitbake <target>' | ||
| 2173 | |||
| 2174 | Common targets are: | ||
| 2175 | core-image-minimal | ||
| 2176 | core-image-sato | ||
| 2177 | meta-toolchain | ||
| 2178 | adt-installer | ||
| 2179 | meta-ide-support | ||
| 2180 | |||
| 2181 | You can also run generated qemu images with a command like 'runqemu qemux86' | ||
| 2182 | </literallayout> | ||
| 2183 | Once you've done that, you can cd to whatever directory | ||
| 2184 | contains your scripts and use 'crosstap' to run the script: | ||
| 2185 | <literallayout class='monospaced'> | ||
| 2186 | $ cd /path/to/my/systemap/script | ||
| 2187 | $ crosstap root@192.168.7.2 trace_open.stp | ||
| 2188 | </literallayout> | ||
| 2189 | If you get an error connecting to the target e.g.: | ||
| 2190 | <literallayout class='monospaced'> | ||
| 2191 | $ crosstap root@192.168.7.2 trace_open.stp | ||
| 2192 | error establishing ssh connection on remote 'root@192.168.7.2' | ||
| 2193 | </literallayout> | ||
| 2194 | Try ssh'ing to the target and see what happens: | ||
| 2195 | <literallayout class='monospaced'> | ||
| 2196 | $ ssh root@192.168.7.2 | ||
| 2197 | </literallayout> | ||
| 2198 | A lot of the time, connection problems are due specifying a | ||
| 2199 | wrong IP address or having a 'host key verification error'. | ||
| 2200 | </para> | ||
| 2201 | |||
| 2202 | <para> | ||
| 2203 | If everything worked as planned, you should see something | ||
| 2204 | like this (enter the password when prompted, or press enter | ||
| 2205 | if it's set up to use no password): | ||
| 2206 | <literallayout class='monospaced'> | ||
| 2207 | $ crosstap root@192.168.7.2 trace_open.stp | ||
| 2208 | root@192.168.7.2's password: | ||
| 2209 | matchbox-termin(1036) open ("/tmp/vte3FS2LW", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 0600) | ||
| 2210 | matchbox-termin(1036) open ("/tmp/vteJMC7LW", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 0600) | ||
| 2211 | </literallayout> | ||
| 2212 | </para> | ||
| 2213 | </section> | ||
| 2214 | |||
| 2215 | <section id='systemtap-documentation'> | ||
| 2216 | <title>Documentation</title> | ||
| 2217 | |||
| 2218 | <para> | ||
| 2219 | The SystemTap language reference can be found here: | ||
| 2220 | <ulink url='http://sourceware.org/systemtap/langref/'>SystemTap Language Reference</ulink> | ||
| 2221 | </para> | ||
| 2222 | |||
| 2223 | <para> | ||
| 2224 | Links to other SystemTap documents, tutorials, and examples can be | ||
| 2225 | found here: | ||
| 2226 | <ulink url='http://sourceware.org/systemtap/documentation.html'>SystemTap documentation page</ulink> | ||
| 2227 | </para> | ||
| 2228 | </section> | ||
| 2229 | </section> | ||
| 2230 | |||
| 2231 | <section id='profile-manual-oprofile'> | ||
| 2232 | <title>oprofile</title> | ||
| 2233 | |||
| 2234 | <para> | ||
| 2235 | oprofile itself is a command-line application that runs on the | ||
| 2236 | target system. | ||
| 2237 | </para> | ||
| 2238 | |||
| 2239 | <section id='oprofile-setup'> | ||
| 2240 | <title>Setup</title> | ||
| 2241 | |||
| 2242 | <para> | ||
| 2243 | For this section, we'll assume you've already performed the | ||
| 2244 | basic setup outlined in the | ||
| 2245 | "<link linkend='profile-manual-general-setup'>General Setup</link>" | ||
| 2246 | section. | ||
| 2247 | </para> | ||
| 2248 | |||
| 2249 | <para> | ||
| 2250 | For the section that deals with running oprofile from the command-line, | ||
| 2251 | we assume you've ssh'ed to the host and will be running | ||
| 2252 | oprofile on the target. | ||
| 2253 | </para> | ||
| 2254 | |||
| 2255 | <para> | ||
| 2256 | oprofileui (oprofile-viewer) is a GUI-based program that runs | ||
| 2257 | on the host and interacts remotely with the target. | ||
| 2258 | See the oprofileui section for the exact steps needed to | ||
| 2259 | install oprofileui on the host. | ||
| 2260 | </para> | ||
| 2261 | </section> | ||
| 2262 | |||
| 2263 | <section id='oprofile-basic-usage'> | ||
| 2264 | <title>Basic Usage</title> | ||
| 2265 | |||
| 2266 | <para> | ||
| 2267 | Oprofile as configured in Yocto is a system-wide profiler | ||
| 2268 | (i.e. the version in Yocto doesn't yet make use of the | ||
| 2269 | perf_events interface which would allow it to profile | ||
| 2270 | specific processes and workloads). It relies on hardware | ||
| 2271 | counter support in the hardware (but can fall back to a | ||
| 2272 | timer-based mode), which means that it doesn't take | ||
| 2273 | advantage of tracepoints or other event sources for example. | ||
| 2274 | </para> | ||
| 2275 | |||
| 2276 | <para> | ||
| 2277 | It consists of a kernel module that collects samples and a | ||
| 2278 | userspace daemon that writes the sample data to disk. | ||
| 2279 | </para> | ||
| 2280 | |||
| 2281 | <para> | ||
| 2282 | The 'opcontrol' shell script is used for transparently | ||
| 2283 | managing these components and starting and stopping | ||
| 2284 | profiles, and the 'opreport' command is used to | ||
| 2285 | display the results. | ||
| 2286 | </para> | ||
| 2287 | |||
| 2288 | <para> | ||
| 2289 | The oprofile daemon should already be running, but before | ||
| 2290 | you start profiling, you may need to change some settings | ||
| 2291 | and some of these settings may require the daemon to not | ||
| 2292 | be running. One of these settings is the path to the | ||
| 2293 | vmlinux file, which you'll want to set using the --vmlinux | ||
| 2294 | option if you want the kernel profiled: | ||
| 2295 | <literallayout class='monospaced'> | ||
| 2296 | root@crownbay:~# opcontrol --vmlinux=/boot/vmlinux-`uname -r` | ||
| 2297 | The profiling daemon is currently active, so changes to the configuration | ||
| 2298 | will be used the next time you restart oprofile after a --shutdown or --deinit. | ||
| 2299 | </literallayout> | ||
| 2300 | You can check if vmlinux file: is set using opcontrol --status: | ||
| 2301 | <literallayout class='monospaced'> | ||
| 2302 | root@crownbay:~# opcontrol --status | ||
| 2303 | Daemon paused: pid 1334 | ||
| 2304 | Separate options: library | ||
| 2305 | vmlinux file: none | ||
| 2306 | Image filter: none | ||
| 2307 | Call-graph depth: 6 | ||
| 2308 | </literallayout> | ||
| 2309 | If it's not, you need to shutdown the daemon, add the setting | ||
| 2310 | and restart the daemon: | ||
| 2311 | <literallayout class='monospaced'> | ||
| 2312 | root@crownbay:~# opcontrol --shutdown | ||
| 2313 | Killing daemon. | ||
| 2314 | |||
| 2315 | root@crownbay:~# opcontrol --vmlinux=/boot/vmlinux-`uname -r` | ||
| 2316 | root@crownbay:~# opcontrol --start-daemon | ||
| 2317 | Using default event: CPU_CLK_UNHALTED:100000:0:1:1 | ||
| 2318 | Using 2.6+ OProfile kernel interface. | ||
| 2319 | Reading module info. | ||
| 2320 | Using log file /var/lib/oprofile/samples/oprofiled.log | ||
| 2321 | Daemon started. | ||
| 2322 | </literallayout> | ||
| 2323 | If we check the status again we now see our updated settings: | ||
| 2324 | <literallayout class='monospaced'> | ||
| 2325 | root@crownbay:~# opcontrol --status | ||
| 2326 | Daemon paused: pid 1649 | ||
| 2327 | Separate options: library | ||
| 2328 | vmlinux file: /boot/vmlinux-3.4.11-yocto-standard | ||
| 2329 | Image filter: none | ||
| 2330 | Call-graph depth: 6 | ||
| 2331 | </literallayout> | ||
| 2332 | We're now in a position to run a profile. For that we use | ||
| 2333 | 'opcontrol --start': | ||
| 2334 | <literallayout class='monospaced'> | ||
| 2335 | root@crownbay:~# opcontrol --start | ||
| 2336 | Profiler running. | ||
| 2337 | </literallayout> | ||
| 2338 | In another window, run our wget workload: | ||
| 2339 | <literallayout class='monospaced'> | ||
| 2340 | root@crownbay:~# rm linux-2.6.19.2.tar.bz2; wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>; sync | ||
| 2341 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
| 2342 | linux-2.6.19.2.tar.b 100% |*******************************| 41727k 0:00:00 ETA | ||
| 2343 | </literallayout> | ||
| 2344 | To stop the profile we use 'opcontrol --shutdown', which not | ||
| 2345 | only stops the profile but shuts down the daemon as well: | ||
| 2346 | <literallayout class='monospaced'> | ||
| 2347 | root@crownbay:~# opcontrol --shutdown | ||
| 2348 | Stopping profiling. | ||
| 2349 | Killing daemon. | ||
| 2350 | </literallayout> | ||
| 2351 | Oprofile writes sample data to /var/lib/oprofile/samples, | ||
| 2352 | which you can look at if you're interested in seeing how the | ||
| 2353 | samples are structured. This is also interesting because | ||
| 2354 | it's related to how you dive down to get further details | ||
| 2355 | about specific executables in OProfile. | ||
| 2356 | </para> | ||
| 2357 | |||
| 2358 | <para> | ||
| 2359 | To see the default display output for a profile, simply type | ||
| 2360 | 'opreport', which will show the results using the data in | ||
| 2361 | /var/lib/oprofile/samples: | ||
| 2362 | <literallayout class='monospaced'> | ||
| 2363 | root@crownbay:~# opreport | ||
| 2364 | |||
| 2365 | WARNING! The OProfile kernel driver reports sample buffer overflows. | ||
| 2366 | Such overflows can result in incorrect sample attribution, invalid sample | ||
| 2367 | files and other symptoms. See the oprofiled.log for details. | ||
| 2368 | You should adjust your sampling frequency to eliminate (or at least minimize) | ||
| 2369 | these overflows. | ||
| 2370 | CPU: Intel Architectural Perfmon, speed 1.3e+06 MHz (estimated) | ||
| 2371 | Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (No unit mask) count 100000 | ||
| 2372 | CPU_CLK_UNHALT...| | ||
| 2373 | samples| %| | ||
| 2374 | ------------------ | ||
| 2375 | 464365 79.8156 vmlinux-3.4.11-yocto-standard | ||
| 2376 | 65108 11.1908 oprofiled | ||
| 2377 | CPU_CLK_UNHALT...| | ||
| 2378 | samples| %| | ||
| 2379 | ------------------ | ||
| 2380 | 64416 98.9372 oprofiled | ||
| 2381 | 692 1.0628 libc-2.16.so | ||
| 2382 | 36959 6.3526 no-vmlinux | ||
| 2383 | 4378 0.7525 busybox | ||
| 2384 | CPU_CLK_UNHALT...| | ||
| 2385 | samples| %| | ||
| 2386 | ------------------ | ||
| 2387 | 2844 64.9612 libc-2.16.so | ||
| 2388 | 1337 30.5391 busybox | ||
| 2389 | 193 4.4084 ld-2.16.so | ||
| 2390 | 2 0.0457 libnss_compat-2.16.so | ||
| 2391 | 1 0.0228 libnsl-2.16.so | ||
| 2392 | 1 0.0228 libnss_files-2.16.so | ||
| 2393 | 4344 0.7467 bash | ||
| 2394 | CPU_CLK_UNHALT...| | ||
| 2395 | samples| %| | ||
| 2396 | ------------------ | ||
| 2397 | 2657 61.1648 bash | ||
| 2398 | 1665 38.3287 libc-2.16.so | ||
| 2399 | 18 0.4144 ld-2.16.so | ||
| 2400 | 3 0.0691 libtinfo.so.5.9 | ||
| 2401 | 1 0.0230 libdl-2.16.so | ||
| 2402 | 3118 0.5359 nf_conntrack | ||
| 2403 | 686 0.1179 matchbox-terminal | ||
| 2404 | CPU_CLK_UNHALT...| | ||
| 2405 | samples| %| | ||
| 2406 | ------------------ | ||
| 2407 | 214 31.1953 libglib-2.0.so.0.3200.4 | ||
| 2408 | 114 16.6181 libc-2.16.so | ||
| 2409 | 79 11.5160 libcairo.so.2.11200.2 | ||
| 2410 | 78 11.3703 libgdk-x11-2.0.so.0.2400.8 | ||
| 2411 | 51 7.4344 libpthread-2.16.so | ||
| 2412 | 45 6.5598 libgobject-2.0.so.0.3200.4 | ||
| 2413 | 29 4.2274 libvte.so.9.2800.2 | ||
| 2414 | 25 3.6443 libX11.so.6.3.0 | ||
| 2415 | 19 2.7697 libxcb.so.1.1.0 | ||
| 2416 | 17 2.4781 libgtk-x11-2.0.so.0.2400.8 | ||
| 2417 | 12 1.7493 librt-2.16.so | ||
| 2418 | 3 0.4373 libXrender.so.1.3.0 | ||
| 2419 | 671 0.1153 emgd | ||
| 2420 | 411 0.0706 nf_conntrack_ipv4 | ||
| 2421 | 391 0.0672 iptable_nat | ||
| 2422 | 378 0.0650 nf_nat | ||
| 2423 | 263 0.0452 Xorg | ||
| 2424 | CPU_CLK_UNHALT...| | ||
| 2425 | samples| %| | ||
| 2426 | ------------------ | ||
| 2427 | 106 40.3042 Xorg | ||
| 2428 | 53 20.1521 libc-2.16.so | ||
| 2429 | 31 11.7871 libpixman-1.so.0.27.2 | ||
| 2430 | 26 9.8859 emgd_drv.so | ||
| 2431 | 16 6.0837 libemgdsrv_um.so.1.5.15.3226 | ||
| 2432 | 11 4.1825 libEMGD2d.so.1.5.15.3226 | ||
| 2433 | 9 3.4221 libfb.so | ||
| 2434 | 7 2.6616 libpthread-2.16.so | ||
| 2435 | 1 0.3802 libudev.so.0.9.3 | ||
| 2436 | 1 0.3802 libdrm.so.2.4.0 | ||
| 2437 | 1 0.3802 libextmod.so | ||
| 2438 | 1 0.3802 mouse_drv.so | ||
| 2439 | . | ||
| 2440 | . | ||
| 2441 | . | ||
| 2442 | 9 0.0015 connmand | ||
| 2443 | CPU_CLK_UNHALT...| | ||
| 2444 | samples| %| | ||
| 2445 | ------------------ | ||
| 2446 | 4 44.4444 libglib-2.0.so.0.3200.4 | ||
| 2447 | 2 22.2222 libpthread-2.16.so | ||
| 2448 | 1 11.1111 connmand | ||
| 2449 | 1 11.1111 libc-2.16.so | ||
| 2450 | 1 11.1111 librt-2.16.so | ||
| 2451 | 6 0.0010 oprofile-server | ||
| 2452 | CPU_CLK_UNHALT...| | ||
| 2453 | samples| %| | ||
| 2454 | ------------------ | ||
| 2455 | 3 50.0000 libc-2.16.so | ||
| 2456 | 1 16.6667 oprofile-server | ||
| 2457 | 1 16.6667 libpthread-2.16.so | ||
| 2458 | 1 16.6667 libglib-2.0.so.0.3200.4 | ||
| 2459 | 5 8.6e-04 gconfd-2 | ||
| 2460 | CPU_CLK_UNHALT...| | ||
| 2461 | samples| %| | ||
| 2462 | ------------------ | ||
| 2463 | 2 40.0000 libdbus-1.so.3.7.2 | ||
| 2464 | 2 40.0000 libglib-2.0.so.0.3200.4 | ||
| 2465 | 1 20.0000 libc-2.16.so | ||
| 2466 | </literallayout> | ||
| 2467 | The output above shows the breakdown or samples by both | ||
| 2468 | number of samples and percentage for each executable. | ||
| 2469 | Within an executable, the sample counts are broken down | ||
| 2470 | further into executable and shared libraries (DSOs) used | ||
| 2471 | by the executable. | ||
| 2472 | </para> | ||
| 2473 | |||
| 2474 | <para> | ||
| 2475 | To get even more detailed breakdowns by function, we need to | ||
| 2476 | have the full paths to the DSOs, which we can get by | ||
| 2477 | using -f with opreport: | ||
| 2478 | <literallayout class='monospaced'> | ||
| 2479 | root@crownbay:~# opreport -f | ||
| 2480 | |||
| 2481 | CPU: Intel Architectural Perfmon, speed 1.3e+06 MHz (estimated) | ||
| 2482 | Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (No unit mask) count 100000 | ||
| 2483 | CPU_CLK_UNHALT...| | ||
| 2484 | samples| %| | ||
| 2485 | |||
| 2486 | 464365 79.8156 /boot/vmlinux-3.4.11-yocto-standard | ||
| 2487 | 65108 11.1908 /usr/bin/oprofiled | ||
| 2488 | CPU_CLK_UNHALT...| | ||
| 2489 | samples| %| | ||
| 2490 | ------------------ | ||
| 2491 | 64416 98.9372 /usr/bin/oprofiled | ||
| 2492 | 692 1.0628 /lib/libc-2.16.so | ||
| 2493 | 36959 6.3526 /no-vmlinux | ||
| 2494 | 4378 0.7525 /bin/busybox | ||
| 2495 | CPU_CLK_UNHALT...| | ||
| 2496 | samples| %| | ||
| 2497 | ------------------ | ||
| 2498 | 2844 64.9612 /lib/libc-2.16.so | ||
| 2499 | 1337 30.5391 /bin/busybox | ||
| 2500 | 193 4.4084 /lib/ld-2.16.so | ||
| 2501 | 2 0.0457 /lib/libnss_compat-2.16.so | ||
| 2502 | 1 0.0228 /lib/libnsl-2.16.so | ||
| 2503 | 1 0.0228 /lib/libnss_files-2.16.so | ||
| 2504 | 4344 0.7467 /bin/bash | ||
| 2505 | CPU_CLK_UNHALT...| | ||
| 2506 | samples| %| | ||
| 2507 | ------------------ | ||
| 2508 | 2657 61.1648 /bin/bash | ||
| 2509 | 1665 38.3287 /lib/libc-2.16.so | ||
| 2510 | 18 0.4144 /lib/ld-2.16.so | ||
| 2511 | 3 0.0691 /lib/libtinfo.so.5.9 | ||
| 2512 | 1 0.0230 /lib/libdl-2.16.so | ||
| 2513 | . | ||
| 2514 | . | ||
| 2515 | . | ||
| 2516 | </literallayout> | ||
| 2517 | Using the paths shown in the above output and the -l option to | ||
| 2518 | opreport, we can see all the functions that have hits in the | ||
| 2519 | profile and their sample counts and percentages. Here's a | ||
| 2520 | portion of what we get for the kernel: | ||
| 2521 | <literallayout class='monospaced'> | ||
| 2522 | root@crownbay:~# opreport -l /boot/vmlinux-3.4.11-yocto-standard | ||
| 2523 | |||
| 2524 | CPU: Intel Architectural Perfmon, speed 1.3e+06 MHz (estimated) | ||
| 2525 | Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (No unit mask) count 100000 | ||
| 2526 | samples % symbol name | ||
| 2527 | 233981 50.3873 intel_idle | ||
| 2528 | 15437 3.3243 rb_get_reader_page | ||
| 2529 | 14503 3.1232 ring_buffer_consume | ||
| 2530 | 14092 3.0347 mutex_spin_on_owner | ||
| 2531 | 13024 2.8047 read_hpet | ||
| 2532 | 8039 1.7312 sub_preempt_count | ||
| 2533 | 7096 1.5281 ioread32 | ||
| 2534 | 6997 1.5068 add_preempt_count | ||
| 2535 | 3985 0.8582 rb_advance_reader | ||
| 2536 | 3488 0.7511 add_event_entry | ||
| 2537 | 3303 0.7113 get_parent_ip | ||
| 2538 | 3104 0.6684 rb_buffer_peek | ||
| 2539 | 2960 0.6374 op_cpu_buffer_read_entry | ||
| 2540 | 2614 0.5629 sync_buffer | ||
| 2541 | 2545 0.5481 debug_smp_processor_id | ||
| 2542 | 2456 0.5289 ohci_irq | ||
| 2543 | 2397 0.5162 memset | ||
| 2544 | 2349 0.5059 __copy_to_user_ll | ||
| 2545 | 2185 0.4705 ring_buffer_event_length | ||
| 2546 | 1918 0.4130 in_lock_functions | ||
| 2547 | 1850 0.3984 __schedule | ||
| 2548 | 1767 0.3805 __copy_from_user_ll_nozero | ||
| 2549 | 1575 0.3392 rb_event_data_length | ||
| 2550 | 1256 0.2705 memcpy | ||
| 2551 | 1233 0.2655 system_call | ||
| 2552 | 1213 0.2612 menu_select | ||
| 2553 | </literallayout> | ||
| 2554 | Notice that above we see an entry for the __copy_to_user_ll() | ||
| 2555 | function that we've looked at with other profilers as well. | ||
| 2556 | </para> | ||
| 2557 | |||
| 2558 | <para> | ||
| 2559 | Here's what we get when we do the same thing for the | ||
| 2560 | busybox executable: | ||
| 2561 | <literallayout class='monospaced'> | ||
| 2562 | CPU: Intel Architectural Perfmon, speed 1.3e+06 MHz (estimated) | ||
| 2563 | Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (No unit mask) count 100000 | ||
| 2564 | samples % image name symbol name | ||
| 2565 | 349 8.4198 busybox retrieve_file_data | ||
| 2566 | 308 7.4306 libc-2.16.so _IO_file_xsgetn | ||
| 2567 | 283 6.8275 libc-2.16.so __read_nocancel | ||
| 2568 | 235 5.6695 libc-2.16.so syscall | ||
| 2569 | 233 5.6212 libc-2.16.so clearerr | ||
| 2570 | 215 5.1870 libc-2.16.so fread | ||
| 2571 | 181 4.3667 libc-2.16.so __write_nocancel | ||
| 2572 | 158 3.8118 libc-2.16.so __underflow | ||
| 2573 | 151 3.6429 libc-2.16.so _dl_addr | ||
| 2574 | 150 3.6188 busybox progress_meter | ||
| 2575 | 150 3.6188 libc-2.16.so __poll_nocancel | ||
| 2576 | 148 3.5706 libc-2.16.so _IO_file_underflow@@GLIBC_2.1 | ||
| 2577 | 137 3.3052 busybox safe_poll | ||
| 2578 | 125 3.0157 busybox bb_progress_update | ||
| 2579 | 122 2.9433 libc-2.16.so __x86.get_pc_thunk.bx | ||
| 2580 | 95 2.2919 busybox full_write | ||
| 2581 | 81 1.9542 busybox safe_write | ||
| 2582 | 77 1.8577 busybox xwrite | ||
| 2583 | 72 1.7370 libc-2.16.so _IO_file_read | ||
| 2584 | 71 1.7129 libc-2.16.so _IO_sgetn | ||
| 2585 | 67 1.6164 libc-2.16.so poll | ||
| 2586 | 52 1.2545 libc-2.16.so _IO_switch_to_get_mode | ||
| 2587 | 45 1.0856 libc-2.16.so read | ||
| 2588 | 34 0.8203 libc-2.16.so write | ||
| 2589 | 32 0.7720 busybox monotonic_sec | ||
| 2590 | 25 0.6031 libc-2.16.so vfprintf | ||
| 2591 | 22 0.5308 busybox get_mono | ||
| 2592 | 14 0.3378 ld-2.16.so strcmp | ||
| 2593 | 14 0.3378 libc-2.16.so __x86.get_pc_thunk.cx | ||
| 2594 | . | ||
| 2595 | . | ||
| 2596 | . | ||
| 2597 | </literallayout> | ||
| 2598 | Since we recorded the profile with a callchain depth of 6, we | ||
| 2599 | should be able to see our __copy_to_user_ll() callchains in | ||
| 2600 | the output, and indeed we can if we search around a bit in | ||
| 2601 | the 'opreport --callgraph' output: | ||
| 2602 | <literallayout class='monospaced'> | ||
| 2603 | root@crownbay:~# opreport --callgraph /boot/vmlinux-3.4.11-yocto-standard | ||
| 2604 | |||
| 2605 | 392 6.9639 vmlinux-3.4.11-yocto-standard sock_aio_read | ||
| 2606 | 736 13.0751 vmlinux-3.4.11-yocto-standard __generic_file_aio_write | ||
| 2607 | 3255 57.8255 vmlinux-3.4.11-yocto-standard inet_recvmsg | ||
| 2608 | 785 0.1690 vmlinux-3.4.11-yocto-standard tcp_recvmsg | ||
| 2609 | 1790 31.7940 vmlinux-3.4.11-yocto-standard local_bh_enable | ||
| 2610 | 1238 21.9893 vmlinux-3.4.11-yocto-standard __kfree_skb | ||
| 2611 | 992 17.6199 vmlinux-3.4.11-yocto-standard lock_sock_nested | ||
| 2612 | 785 13.9432 vmlinux-3.4.11-yocto-standard tcp_recvmsg [self] | ||
| 2613 | 525 9.3250 vmlinux-3.4.11-yocto-standard release_sock | ||
| 2614 | 112 1.9893 vmlinux-3.4.11-yocto-standard tcp_cleanup_rbuf | ||
| 2615 | 72 1.2789 vmlinux-3.4.11-yocto-standard skb_copy_datagram_iovec | ||
| 2616 | |||
| 2617 | 170 0.0366 vmlinux-3.4.11-yocto-standard skb_copy_datagram_iovec | ||
| 2618 | 1491 73.3038 vmlinux-3.4.11-yocto-standard memcpy_toiovec | ||
| 2619 | 327 16.0767 vmlinux-3.4.11-yocto-standard skb_copy_datagram_iovec | ||
| 2620 | 170 8.3579 vmlinux-3.4.11-yocto-standard skb_copy_datagram_iovec [self] | ||
| 2621 | 20 0.9833 vmlinux-3.4.11-yocto-standard copy_to_user | ||
| 2622 | |||
| 2623 | 2588 98.2909 vmlinux-3.4.11-yocto-standard copy_to_user | ||
| 2624 | 2349 0.5059 vmlinux-3.4.11-yocto-standard __copy_to_user_ll | ||
| 2625 | 2349 89.2138 vmlinux-3.4.11-yocto-standard __copy_to_user_ll [self] | ||
| 2626 | 166 6.3046 vmlinux-3.4.11-yocto-standard do_page_fault | ||
| 2627 | </literallayout> | ||
| 2628 | Remember that by default OProfile sessions are cumulative | ||
| 2629 | i.e. if you start and stop a profiling session, then start a | ||
| 2630 | new one, the new one will not erase the previous run(s) but | ||
| 2631 | will build on it. If you want to restart a profile from scratch, | ||
| 2632 | you need to reset: | ||
| 2633 | <literallayout class='monospaced'> | ||
| 2634 | root@crownbay:~# opcontrol --reset | ||
| 2635 | </literallayout> | ||
| 2636 | </para> | ||
| 2637 | </section> | ||
| 2638 | |||
| 2639 | <section id='oprofileui-a-gui-for-oprofile'> | ||
| 2640 | <title>OProfileUI - A GUI for OProfile</title> | ||
| 2641 | |||
| 2642 | <para> | ||
| 2643 | Yocto also supports a graphical UI for controlling and viewing | ||
| 2644 | OProfile traces, called OProfileUI. To use it, you first need | ||
| 2645 | to clone the oprofileui git repo, then configure, build, and | ||
| 2646 | install it: | ||
| 2647 | <literallayout class='monospaced'> | ||
| 2648 | [trz@empanada tmp]$ git clone git://git.yoctoproject.org/oprofileui | ||
| 2649 | [trz@empanada tmp]$ cd oprofileui | ||
| 2650 | [trz@empanada oprofileui]$ ./autogen.sh | ||
| 2651 | [trz@empanada oprofileui]$ sudo make install | ||
| 2652 | </literallayout> | ||
| 2653 | OprofileUI replaces the 'opreport' functionality with a GUI, | ||
| 2654 | and normally doesn't require the user to use 'opcontrol' either. | ||
| 2655 | If you want to profile the kernel, however, you need to either | ||
| 2656 | use the UI to specify a vmlinux or use 'opcontrol' to specify | ||
| 2657 | it on the target: | ||
| 2658 | </para> | ||
| 2659 | |||
| 2660 | <para> | ||
| 2661 | First, on the target, check if vmlinux file: is set: | ||
| 2662 | <literallayout class='monospaced'> | ||
| 2663 | root@crownbay:~# opcontrol --status | ||
| 2664 | </literallayout> | ||
| 2665 | If not: | ||
| 2666 | <literallayout class='monospaced'> | ||
| 2667 | root@crownbay:~# opcontrol --shutdown | ||
| 2668 | root@crownbay:~# opcontrol --vmlinux=/boot/vmlinux-`uname -r` | ||
| 2669 | root@crownbay:~# opcontrol --start-daemon | ||
| 2670 | </literallayout> | ||
| 2671 | Now, start the oprofile UI on the host system: | ||
| 2672 | <literallayout class='monospaced'> | ||
| 2673 | [trz@empanada oprofileui]$ oprofile-viewer | ||
| 2674 | </literallayout> | ||
| 2675 | To run a profile on the remote system, first connect to the | ||
| 2676 | remote system by pressing the 'Connect' button and supplying | ||
| 2677 | the IP address and port of the remote system (the default | ||
| 2678 | port is 4224). | ||
| 2679 | </para> | ||
| 2680 | |||
| 2681 | <para> | ||
| 2682 | The oprofile server should automatically be started already. | ||
| 2683 | If not, the connection will fail and you either typed in the | ||
| 2684 | wrong IP address and port (see below), or you need to start | ||
| 2685 | the server yourself: | ||
| 2686 | <literallayout class='monospaced'> | ||
| 2687 | root@crownbay:~# oprofile-server | ||
| 2688 | </literallayout> | ||
| 2689 | Or, to specify a specific port: | ||
| 2690 | <literallayout class='monospaced'> | ||
| 2691 | root@crownbay:~# oprofile-server --port 8888 | ||
| 2692 | </literallayout> | ||
| 2693 | Once connected, press the 'Start' button and then run the | ||
| 2694 | wget workload on the remote system: | ||
| 2695 | <literallayout class='monospaced'> | ||
| 2696 | root@crownbay:~# rm linux-2.6.19.2.tar.bz2; wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>; sync | ||
| 2697 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
| 2698 | linux-2.6.19.2.tar.b 100% |*******************************| 41727k 0:00:00 ETA | ||
| 2699 | </literallayout> | ||
| 2700 | Once the workload completes, press the 'Stop' button. At that | ||
| 2701 | point the OProfile viewer will download the profile files it's | ||
| 2702 | collected (this may take some time, especially if the kernel | ||
| 2703 | was profiled). While it downloads the files, you should see | ||
| 2704 | something like the following: | ||
| 2705 | </para> | ||
| 2706 | |||
| 2707 | <para> | ||
| 2708 | <imagedata fileref="figures/oprofileui-downloading.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 2709 | </para> | ||
| 2710 | |||
| 2711 | <para> | ||
| 2712 | Once the profile files have been retrieved, you should see a | ||
| 2713 | list of the processes that were profiled: | ||
| 2714 | </para> | ||
| 2715 | |||
| 2716 | <para> | ||
| 2717 | <imagedata fileref="figures/oprofileui-processes.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 2718 | </para> | ||
| 2719 | |||
| 2720 | <para> | ||
| 2721 | If you select one of them, you should see all the symbols that | ||
| 2722 | were hit during the profile. Selecting one of them will show a | ||
| 2723 | list of callers and callees of the chosen function in two | ||
| 2724 | panes below the top pane. For example, here's what we see | ||
| 2725 | when we select __copy_to_user_ll(): | ||
| 2726 | </para> | ||
| 2727 | |||
| 2728 | <para> | ||
| 2729 | <imagedata fileref="figures/oprofileui-copy-to-user.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 2730 | </para> | ||
| 2731 | |||
| 2732 | <para> | ||
| 2733 | As another example, we can look at the busybox process and see | ||
| 2734 | that the progress meter made a system call: | ||
| 2735 | </para> | ||
| 2736 | |||
| 2737 | <para> | ||
| 2738 | <imagedata fileref="figures/oprofileui-busybox.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
| 2739 | </para> | ||
| 2740 | </section> | ||
| 2741 | |||
| 2742 | <section id='oprofile-documentation'> | ||
| 2743 | <title>Documentation</title> | ||
| 2744 | |||
| 2745 | <para> | ||
| 2746 | Yocto already has some information on setting up and using | ||
| 2747 | OProfile and oprofileui. As this document doesn't cover | ||
| 2748 | everything in detail, it may be worth taking a look at the | ||
| 2749 | "<ulink url='&YOCTO_DOCS_DEV_URL;#platdev-oprofile'>Profiling with OProfile</ulink>" | ||
| 2750 | section in the Yocto Project Development Manual | ||
| 2751 | </para> | ||
| 2752 | |||
| 2753 | <para> | ||
| 2754 | The OProfile manual can be found here: | ||
| 2755 | <ulink url='http://oprofile.sourceforge.net/doc/index.html'>OProfile manual</ulink> | ||
| 2756 | </para> | ||
| 2757 | |||
| 2758 | <para> | ||
| 2759 | The OProfile website contains links to the above manual and | ||
| 2760 | bunch of other items including an extensive set of examples: | ||
| 2761 | <ulink url='http://oprofile.sourceforge.net/about/'>About OProfile</ulink> | ||
| 2762 | </para> | ||
| 2763 | </section> | ||
| 2764 | </section> | ||
| 2765 | |||
| 2766 | <section id='profile-manual-sysprof'> | ||
| 2767 | <title>Sysprof</title> | ||
| 2768 | |||
| 2769 | <para> | ||
| 2770 | Sysprof is a very easy to use system-wide profiler that consists | ||
| 2771 | of a single window with three panes and a few buttons which allow | ||
| 2772 | you to start, stop, and view the profile from one place. | ||
| 2773 | </para> | ||
| 2774 | |||
| 2775 | <section id='sysprof-setup'> | ||
| 2776 | <title>Setup</title> | ||
| 2777 | |||
| 2778 | <para> | ||
| 2779 | For this section, we'll assume you've already performed the | ||
| 2780 | basic setup outlined in the General Setup section. | ||
| 2781 | </para> | ||
| 2782 | |||
| 2783 | <para> | ||
| 2784 | Sysprof is a GUI-based application that runs on the target | ||
| 2785 | system. For the rest of this document we assume you've | ||
| 2786 | ssh'ed to the host and will be running Sysprof on the | ||
| 2787 | target (you can use the '-X' option to ssh and have the | ||
| 2788 | Sysprof GUI run on the target but display remotely on the | ||
| 2789 | host if you want). | ||
| 2790 | </para> | ||
| 2791 | </section> | ||
| 2792 | |||
| 2793 | <section id='sysprof-basic-usage'> | ||
| 2794 | <title>Basic Usage</title> | ||
| 2795 | |||
| 2796 | <para> | ||
| 2797 | To start profiling the system, you simply press the 'Start' | ||
| 2798 | button. To stop profiling and to start viewing the profile data | ||
| 2799 | in one easy step, press the 'Profile' button. | ||
| 2800 | </para> | ||
| 2801 | |||
| 2802 | <para> | ||
| 2803 | Once you've pressed the profile button, the three panes will | ||
| 2804 | fill up with profiling data: | ||
| 2805 | </para> | ||
| 2806 | |||
| 2807 | <para> | ||
| 2808 | <imagedata fileref="figures/sysprof-copy-to-user.png" width="6in" depth="4in" align="center" scalefit="1" /> | ||
| 2809 | </para> | ||
| 2810 | |||
| 2811 | <para> | ||
| 2812 | The left pane shows a list of functions and processes. | ||
| 2813 | Selecting one of those expands that function in the right | ||
| 2814 | pane, showing all its callees. Note that this caller-oriented | ||
| 2815 | display is essentially the inverse of perf's default | ||
| 2816 | callee-oriented callchain display. | ||
| 2817 | </para> | ||
| 2818 | |||
| 2819 | <para> | ||
| 2820 | In the screenshot above, we're focusing on __copy_to_user_ll() | ||
| 2821 | and looking up the callchain we can see that one of the callers | ||
| 2822 | of __copy_to_user_ll is sys_read() and the complete callpath | ||
| 2823 | between them. Notice that this is essentially a portion of the | ||
| 2824 | same information we saw in the perf display shown in the perf | ||
| 2825 | section of this page. | ||
| 2826 | </para> | ||
| 2827 | |||
| 2828 | <para> | ||
| 2829 | <imagedata fileref="figures/sysprof-copy-from-user.png" width="6in" depth="4in" align="center" scalefit="1" /> | ||
| 2830 | </para> | ||
| 2831 | |||
| 2832 | <para> | ||
| 2833 | Similarly, the above is a snapshot of the Sysprof display of a | ||
| 2834 | copy-from-user callchain. | ||
| 2835 | </para> | ||
| 2836 | |||
| 2837 | <para> | ||
| 2838 | Finally, looking at the third Sysprof pane in the lower left, | ||
| 2839 | we can see a list of all the callers of a particular function | ||
| 2840 | selected in the top left pane. In this case, the lower pane is | ||
| 2841 | showing all the callers of __mark_inode_dirty: | ||
| 2842 | </para> | ||
| 2843 | |||
| 2844 | <para> | ||
| 2845 | <imagedata fileref="figures/sysprof-callers.png" width="6in" depth="4in" align="center" scalefit="1" /> | ||
| 2846 | </para> | ||
| 2847 | |||
| 2848 | <para> | ||
| 2849 | Double-clicking on one of those functions will in turn change the | ||
| 2850 | focus to the selected function, and so on. | ||
| 2851 | </para> | ||
| 2852 | |||
| 2853 | <informalexample> | ||
| 2854 | <emphasis>Tying it Together:</emphasis> If you like sysprof's 'caller-oriented' | ||
| 2855 | display, you may be able to approximate it in other tools as | ||
| 2856 | well. For example, 'perf report' has the -g (--call-graph) | ||
| 2857 | option that you can experiment with; one of the options is | ||
| 2858 | 'caller' for an inverted caller-based callgraph display. | ||
| 2859 | </informalexample> | ||
| 2860 | </section> | ||
| 2861 | |||
| 2862 | <section id='sysprof-documentation'> | ||
| 2863 | <title>Documentation</title> | ||
| 2864 | |||
| 2865 | <para> | ||
| 2866 | There doesn't seem to be any documentation for Sysprof, but | ||
| 2867 | maybe that's because it's pretty self-explanatory. | ||
| 2868 | The Sysprof website, however, is here: | ||
| 2869 | <ulink url='http://sysprof.com/'>Sysprof, System-wide Performance Profiler for Linux</ulink> | ||
| 2870 | </para> | ||
| 2871 | </section> | ||
| 2872 | </section> | ||
| 2873 | |||
| 2874 | <section id='lttng-linux-trace-toolkit-next-generation'> | ||
| 2875 | <title>LTTng (Linux Trace Toolkit, next generation)</title> | ||
| 2876 | |||
| 2877 | <section id='lttng-setup'> | ||
| 2878 | <title>Setup</title> | ||
| 2879 | |||
| 2880 | <para> | ||
| 2881 | For this section, we'll assume you've already performed the | ||
| 2882 | basic setup outlined in the General Setup section. | ||
| 2883 | </para> | ||
| 2884 | |||
| 2885 | <para> | ||
| 2886 | LTTng is run on the target system by ssh'ing to it. | ||
| 2887 | However, if you want to see the traces graphically, | ||
| 2888 | install Eclipse as described in section | ||
| 2889 | "<link linkend='manually-copying-a-trace-to-the-host-and-viewing-it-in-eclipse'>Manually copying a trace to the host and viewing it in Eclipse (i.e. using Eclipse without network support)</link>" | ||
| 2890 | and follow the directions to manually copy traces to the host and | ||
| 2891 | view them in Eclipse (i.e. using Eclipse without network support). | ||
| 2892 | </para> | ||
| 2893 | |||
| 2894 | <note> | ||
| 2895 | Be sure to download and install/run the 'SR1' or later Juno release | ||
| 2896 | of eclipse e.g.: | ||
| 2897 | <ulink url='http://www.eclipse.org/downloads/download.php?file=/technology/epp/downloads/release/juno/SR1/eclipse-cpp-juno-SR1-linux-gtk-x86_64.tar.gz'>http://www.eclipse.org/downloads/download.php?file=/technology/epp/downloads/release/juno/SR1/eclipse-cpp-juno-SR1-linux-gtk-x86_64.tar.gz</ulink> | ||
| 2898 | </note> | ||
| 2899 | </section> | ||
| 2900 | |||
| 2901 | <section id='collecting-and-viewing-traces'> | ||
| 2902 | <title>Collecting and Viewing Traces</title> | ||
| 2903 | |||
| 2904 | <para> | ||
| 2905 | Once you've applied the above commits and built and booted your | ||
| 2906 | image (you need to build the core-image-sato-sdk image or use one of the | ||
| 2907 | other methods described in the General Setup section), you're | ||
| 2908 | ready to start tracing. | ||
| 2909 | </para> | ||
| 2910 | |||
| 2911 | <section id='collecting-and-viewing-a-trace-on-the-target-inside-a-shell'> | ||
| 2912 | <title>Collecting and viewing a trace on the target (inside a shell)</title> | ||
| 2913 | |||
| 2914 | <para> | ||
| 2915 | First, from the host, ssh to the target: | ||
| 2916 | <literallayout class='monospaced'> | ||
| 2917 | $ ssh -l root 192.168.1.47 | ||
| 2918 | The authenticity of host '192.168.1.47 (192.168.1.47)' can't be established. | ||
| 2919 | RSA key fingerprint is 23:bd:c8:b1:a8:71:52:00:ee:00:4f:64:9e:10:b9:7e. | ||
| 2920 | Are you sure you want to continue connecting (yes/no)? yes | ||
| 2921 | Warning: Permanently added '192.168.1.47' (RSA) to the list of known hosts. | ||
| 2922 | root@192.168.1.47's password: | ||
| 2923 | </literallayout> | ||
| 2924 | Once on the target, use these steps to create a trace: | ||
| 2925 | <literallayout class='monospaced'> | ||
| 2926 | root@crownbay:~# lttng create | ||
| 2927 | Spawning a session daemon | ||
| 2928 | Session auto-20121015-232120 created. | ||
| 2929 | Traces will be written in /home/root/lttng-traces/auto-20121015-232120 | ||
| 2930 | </literallayout> | ||
| 2931 | Enable the events you want to trace (in this case all | ||
| 2932 | kernel events): | ||
| 2933 | <literallayout class='monospaced'> | ||
| 2934 | root@crownbay:~# lttng enable-event --kernel --all | ||
| 2935 | All kernel events are enabled in channel channel0 | ||
| 2936 | </literallayout> | ||
| 2937 | Start the trace: | ||
| 2938 | <literallayout class='monospaced'> | ||
| 2939 | root@crownbay:~# lttng start | ||
| 2940 | Tracing started for session auto-20121015-232120 | ||
| 2941 | </literallayout> | ||
| 2942 | And then stop the trace after awhile or after running | ||
| 2943 | a particular workload that you want to trace: | ||
| 2944 | <literallayout class='monospaced'> | ||
| 2945 | root@crownbay:~# lttng stop | ||
| 2946 | Tracing stopped for session auto-20121015-232120 | ||
| 2947 | </literallayout> | ||
| 2948 | You can now view the trace in text form on the target: | ||
| 2949 | <literallayout class='monospaced'> | ||
| 2950 | root@crownbay:~# lttng view | ||
| 2951 | [23:21:56.989270399] (+?.?????????) sys_geteuid: { 1 }, { } | ||
| 2952 | [23:21:56.989278081] (+0.000007682) exit_syscall: { 1 }, { ret = 0 } | ||
| 2953 | [23:21:56.989286043] (+0.000007962) sys_pipe: { 1 }, { fildes = 0xB77B9E8C } | ||
| 2954 | [23:21:56.989321802] (+0.000035759) exit_syscall: { 1 }, { ret = 0 } | ||
| 2955 | [23:21:56.989329345] (+0.000007543) sys_mmap_pgoff: { 1 }, { addr = 0x0, len = 10485760, prot = 3, flags = 131362, fd = 4294967295, pgoff = 0 } | ||
| 2956 | [23:21:56.989351694] (+0.000022349) exit_syscall: { 1 }, { ret = -1247805440 } | ||
| 2957 | [23:21:56.989432989] (+0.000081295) sys_clone: { 1 }, { clone_flags = 0x411, newsp = 0xB5EFFFE4, parent_tid = 0xFFFFFFFF, child_tid = 0x0 } | ||
| 2958 | [23:21:56.989477129] (+0.000044140) sched_stat_runtime: { 1 }, { comm = "lttng-consumerd", tid = 1193, runtime = 681660, vruntime = 43367983388 } | ||
| 2959 | [23:21:56.989486697] (+0.000009568) sched_migrate_task: { 1 }, { comm = "lttng-consumerd", tid = 1193, prio = 20, orig_cpu = 1, dest_cpu = 1 } | ||
| 2960 | [23:21:56.989508418] (+0.000021721) hrtimer_init: { 1 }, { hrtimer = 3970832076, clockid = 1, mode = 1 } | ||
| 2961 | [23:21:56.989770462] (+0.000262044) hrtimer_cancel: { 1 }, { hrtimer = 3993865440 } | ||
| 2962 | [23:21:56.989771580] (+0.000001118) hrtimer_cancel: { 0 }, { hrtimer = 3993812192 } | ||
| 2963 | [23:21:56.989776957] (+0.000005377) hrtimer_expire_entry: { 1 }, { hrtimer = 3993865440, now = 79815980007057, function = 3238465232 } | ||
| 2964 | [23:21:56.989778145] (+0.000001188) hrtimer_expire_entry: { 0 }, { hrtimer = 3993812192, now = 79815980008174, function = 3238465232 } | ||
| 2965 | [23:21:56.989791695] (+0.000013550) softirq_raise: { 1 }, { vec = 1 } | ||
| 2966 | [23:21:56.989795396] (+0.000003701) softirq_raise: { 0 }, { vec = 1 } | ||
| 2967 | [23:21:56.989800635] (+0.000005239) softirq_raise: { 0 }, { vec = 9 } | ||
| 2968 | [23:21:56.989807130] (+0.000006495) sched_stat_runtime: { 1 }, { comm = "lttng-consumerd", tid = 1193, runtime = 330710, vruntime = 43368314098 } | ||
| 2969 | [23:21:56.989809993] (+0.000002863) sched_stat_runtime: { 0 }, { comm = "lttng-sessiond", tid = 1181, runtime = 1015313, vruntime = 36976733240 } | ||
| 2970 | [23:21:56.989818514] (+0.000008521) hrtimer_expire_exit: { 0 }, { hrtimer = 3993812192 } | ||
| 2971 | [23:21:56.989819631] (+0.000001117) hrtimer_expire_exit: { 1 }, { hrtimer = 3993865440 } | ||
| 2972 | [23:21:56.989821866] (+0.000002235) hrtimer_start: { 0 }, { hrtimer = 3993812192, function = 3238465232, expires = 79815981000000, softexpires = 79815981000000 } | ||
| 2973 | [23:21:56.989822984] (+0.000001118) hrtimer_start: { 1 }, { hrtimer = 3993865440, function = 3238465232, expires = 79815981000000, softexpires = 79815981000000 } | ||
| 2974 | [23:21:56.989832762] (+0.000009778) softirq_entry: { 1 }, { vec = 1 } | ||
| 2975 | [23:21:56.989833879] (+0.000001117) softirq_entry: { 0 }, { vec = 1 } | ||
| 2976 | [23:21:56.989838069] (+0.000004190) timer_cancel: { 1 }, { timer = 3993871956 } | ||
| 2977 | [23:21:56.989839187] (+0.000001118) timer_cancel: { 0 }, { timer = 3993818708 } | ||
| 2978 | [23:21:56.989841492] (+0.000002305) timer_expire_entry: { 1 }, { timer = 3993871956, now = 79515980, function = 3238277552 } | ||
| 2979 | [23:21:56.989842819] (+0.000001327) timer_expire_entry: { 0 }, { timer = 3993818708, now = 79515980, function = 3238277552 } | ||
| 2980 | [23:21:56.989854831] (+0.000012012) sched_stat_runtime: { 1 }, { comm = "lttng-consumerd", tid = 1193, runtime = 49237, vruntime = 43368363335 } | ||
| 2981 | [23:21:56.989855949] (+0.000001118) sched_stat_runtime: { 0 }, { comm = "lttng-sessiond", tid = 1181, runtime = 45121, vruntime = 36976778361 } | ||
| 2982 | [23:21:56.989861257] (+0.000005308) sched_stat_sleep: { 1 }, { comm = "kworker/1:1", tid = 21, delay = 9451318 } | ||
| 2983 | [23:21:56.989862374] (+0.000001117) sched_stat_sleep: { 0 }, { comm = "kworker/0:0", tid = 4, delay = 9958820 } | ||
| 2984 | [23:21:56.989868241] (+0.000005867) sched_wakeup: { 0 }, { comm = "kworker/0:0", tid = 4, prio = 120, success = 1, target_cpu = 0 } | ||
| 2985 | [23:21:56.989869358] (+0.000001117) sched_wakeup: { 1 }, { comm = "kworker/1:1", tid = 21, prio = 120, success = 1, target_cpu = 1 } | ||
| 2986 | [23:21:56.989877460] (+0.000008102) timer_expire_exit: { 1 }, { timer = 3993871956 } | ||
| 2987 | [23:21:56.989878577] (+0.000001117) timer_expire_exit: { 0 }, { timer = 3993818708 } | ||
| 2988 | . | ||
| 2989 | . | ||
| 2990 | . | ||
| 2991 | </literallayout> | ||
| 2992 | You can now safely destroy the trace session (note that | ||
| 2993 | this doesn't delete the trace - it's still there | ||
| 2994 | in ~/lttng-traces): | ||
| 2995 | <literallayout class='monospaced'> | ||
| 2996 | root@crownbay:~# lttng destroy | ||
| 2997 | Session auto-20121015-232120 destroyed at /home/root | ||
| 2998 | </literallayout> | ||
| 2999 | Note that the trace is saved in a directory of the same | ||
| 3000 | name as returned by 'lttng create', under the ~/lttng-traces | ||
| 3001 | directory (note that you can change this by supplying your | ||
| 3002 | own name to 'lttng create'): | ||
| 3003 | <literallayout class='monospaced'> | ||
| 3004 | root@crownbay:~# ls -al ~/lttng-traces | ||
| 3005 | drwxrwx--- 3 root root 1024 Oct 15 23:21 . | ||
| 3006 | drwxr-xr-x 5 root root 1024 Oct 15 23:57 .. | ||
| 3007 | drwxrwx--- 3 root root 1024 Oct 15 23:21 auto-20121015-232120 | ||
| 3008 | </literallayout> | ||
| 3009 | </para> | ||
| 3010 | </section> | ||
| 3011 | |||
| 3012 | <section id='collecting-and-viewing-a-userspace-trace-on-the-target-inside-a-shell'> | ||
| 3013 | <title>Collecting and viewing a userspace trace on the target (inside a shell)</title> | ||
| 3014 | |||
| 3015 | <para> | ||
| 3016 | For LTTng userspace tracing, you need to have a properly | ||
| 3017 | instrumented userspace program. For this example, we'll use | ||
| 3018 | the 'hello' test program generated by the lttng-ust build. | ||
| 3019 | </para> | ||
| 3020 | |||
| 3021 | <para> | ||
| 3022 | The 'hello' test program isn't installed on the rootfs by | ||
| 3023 | the lttng-ust build, so we need to copy it over manually. | ||
| 3024 | First cd into the build directory that contains the hello | ||
| 3025 | executable: | ||
| 3026 | <literallayout class='monospaced'> | ||
| 3027 | $ cd build/tmp/work/core2_32-poky-linux/lttng-ust/2.0.5-r0/git/tests/hello/.libs | ||
| 3028 | </literallayout> | ||
| 3029 | Copy that over to the target machine: | ||
| 3030 | <literallayout class='monospaced'> | ||
| 3031 | $ scp hello root@192.168.1.20: | ||
| 3032 | </literallayout> | ||
| 3033 | You now have the instrumented lttng 'hello world' test | ||
| 3034 | program on the target, ready to test. | ||
| 3035 | </para> | ||
| 3036 | |||
| 3037 | <para> | ||
| 3038 | First, from the host, ssh to the target: | ||
| 3039 | <literallayout class='monospaced'> | ||
| 3040 | $ ssh -l root 192.168.1.47 | ||
| 3041 | The authenticity of host '192.168.1.47 (192.168.1.47)' can't be established. | ||
| 3042 | RSA key fingerprint is 23:bd:c8:b1:a8:71:52:00:ee:00:4f:64:9e:10:b9:7e. | ||
| 3043 | Are you sure you want to continue connecting (yes/no)? yes | ||
| 3044 | Warning: Permanently added '192.168.1.47' (RSA) to the list of known hosts. | ||
| 3045 | root@192.168.1.47's password: | ||
| 3046 | </literallayout> | ||
| 3047 | Once on the target, use these steps to create a trace: | ||
| 3048 | <literallayout class='monospaced'> | ||
| 3049 | root@crownbay:~# lttng create | ||
| 3050 | Session auto-20190303-021943 created. | ||
| 3051 | Traces will be written in /home/root/lttng-traces/auto-20190303-021943 | ||
| 3052 | </literallayout> | ||
| 3053 | Enable the events you want to trace (in this case all | ||
| 3054 | userspace events): | ||
| 3055 | <literallayout class='monospaced'> | ||
| 3056 | root@crownbay:~# lttng enable-event --userspace --all | ||
| 3057 | All UST events are enabled in channel channel0 | ||
| 3058 | </literallayout> | ||
| 3059 | Start the trace: | ||
| 3060 | <literallayout class='monospaced'> | ||
| 3061 | root@crownbay:~# lttng start | ||
| 3062 | Tracing started for session auto-20190303-021943 | ||
| 3063 | </literallayout> | ||
| 3064 | Run the instrumented hello world program: | ||
| 3065 | <literallayout class='monospaced'> | ||
| 3066 | root@crownbay:~# ./hello | ||
| 3067 | Hello, World! | ||
| 3068 | Tracing... done. | ||
| 3069 | </literallayout> | ||
| 3070 | And then stop the trace after awhile or after running a | ||
| 3071 | particular workload that you want to trace: | ||
| 3072 | <literallayout class='monospaced'> | ||
| 3073 | root@crownbay:~# lttng stop | ||
| 3074 | Tracing stopped for session auto-20190303-021943 | ||
| 3075 | </literallayout> | ||
| 3076 | You can now view the trace in text form on the target: | ||
| 3077 | <literallayout class='monospaced'> | ||
| 3078 | root@crownbay:~# lttng view | ||
| 3079 | [02:31:14.906146544] (+?.?????????) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 0, intfield2 = 0x0, longfield = 0, netintfield = 0, netintfieldhex = 0x0, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 } | ||
| 3080 | [02:31:14.906170360] (+0.000023816) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 1, intfield2 = 0x1, longfield = 1, netintfield = 1, netintfieldhex = 0x1, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 } | ||
| 3081 | [02:31:14.906183140] (+0.000012780) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 2, intfield2 = 0x2, longfield = 2, netintfield = 2, netintfieldhex = 0x2, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 } | ||
| 3082 | [02:31:14.906194385] (+0.000011245) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 3, intfield2 = 0x3, longfield = 3, netintfield = 3, netintfieldhex = 0x3, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 } | ||
| 3083 | . | ||
| 3084 | . | ||
| 3085 | . | ||
| 3086 | </literallayout> | ||
| 3087 | You can now safely destroy the trace session (note that | ||
| 3088 | this doesn't delete the trace - it's still | ||
| 3089 | there in ~/lttng-traces): | ||
| 3090 | <literallayout class='monospaced'> | ||
| 3091 | root@crownbay:~# lttng destroy | ||
| 3092 | Session auto-20190303-021943 destroyed at /home/root | ||
| 3093 | </literallayout> | ||
| 3094 | </para> | ||
| 3095 | </section> | ||
| 3096 | |||
| 3097 | <section id='manually-copying-a-trace-to-the-host-and-viewing-it-in-eclipse'> | ||
| 3098 | <title>Manually copying a trace to the host and viewing it in Eclipse (i.e. using Eclipse without network support)</title> | ||
| 3099 | |||
| 3100 | <para> | ||
| 3101 | If you already have an LTTng trace on a remote target and | ||
| 3102 | would like to view it in Eclipse on the host, you can easily | ||
| 3103 | copy it from the target to the host and import it into | ||
| 3104 | Eclipse to view it using the LTTng Eclipse plug-in already | ||
| 3105 | bundled in the Eclipse (Juno SR1 or greater). | ||
| 3106 | </para> | ||
| 3107 | |||
| 3108 | <para> | ||
| 3109 | Using the trace we created in the previous section, archive | ||
| 3110 | it and copy it to your host system: | ||
| 3111 | <literallayout class='monospaced'> | ||
| 3112 | root@crownbay:~/lttng-traces# tar zcvf auto-20121015-232120.tar.gz auto-20121015-232120 | ||
| 3113 | auto-20121015-232120/ | ||
| 3114 | auto-20121015-232120/kernel/ | ||
| 3115 | auto-20121015-232120/kernel/metadata | ||
| 3116 | auto-20121015-232120/kernel/channel0_1 | ||
| 3117 | auto-20121015-232120/kernel/channel0_0 | ||
| 3118 | |||
| 3119 | $ scp root@192.168.1.47:lttng-traces/auto-20121015-232120.tar.gz . | ||
| 3120 | root@192.168.1.47's password: | ||
| 3121 | auto-20121015-232120.tar.gz 100% 1566KB 1.5MB/s 00:01 | ||
| 3122 | </literallayout> | ||
| 3123 | Unarchive it on the host: | ||
| 3124 | <literallayout class='monospaced'> | ||
| 3125 | $ gunzip -c auto-20121015-232120.tar.gz | tar xvf - | ||
| 3126 | auto-20121015-232120/ | ||
| 3127 | auto-20121015-232120/kernel/ | ||
| 3128 | auto-20121015-232120/kernel/metadata | ||
| 3129 | auto-20121015-232120/kernel/channel0_1 | ||
| 3130 | auto-20121015-232120/kernel/channel0_0 | ||
| 3131 | </literallayout> | ||
| 3132 | We can now import the trace into Eclipse and view it: | ||
| 3133 | <orderedlist> | ||
| 3134 | <listitem><para>First, start eclipse and open the | ||
| 3135 | 'LTTng Kernel' perspective by selecting the following | ||
| 3136 | menu item: | ||
| 3137 | <literallayout class='monospaced'> | ||
| 3138 | Window | Open Perspective | Other... | ||
| 3139 | </literallayout></para></listitem> | ||
| 3140 | <listitem><para>In the dialog box that opens, select | ||
| 3141 | 'LTTng Kernel' from the list.</para></listitem> | ||
| 3142 | <listitem><para>Back at the main menu, select the | ||
| 3143 | following menu item: | ||
| 3144 | <literallayout class='monospaced'> | ||
| 3145 | File | New | Project... | ||
| 3146 | </literallayout></para></listitem> | ||
| 3147 | <listitem><para>In the dialog box that opens, select | ||
| 3148 | the 'Tracing | Tracing Project' wizard and press | ||
| 3149 | 'Next>'.</para></listitem> | ||
| 3150 | <listitem><para>Give the project a name and press | ||
| 3151 | 'Finish'.</para></listitem> | ||
| 3152 | <listitem><para>In the 'Project Explorer' pane under | ||
| 3153 | the project you created, right click on the | ||
| 3154 | 'Traces' item.</para></listitem> | ||
| 3155 | <listitem><para>Select 'Import..." and in the dialog | ||
| 3156 | that's displayed:</para></listitem> | ||
| 3157 | <listitem><para>Browse the filesystem and find the | ||
| 3158 | select the 'kernel' directory containing the trace | ||
| 3159 | you copied from the target | ||
| 3160 | e.g. auto-20121015-232120/kernel</para></listitem> | ||
| 3161 | <listitem><para>'Checkmark' the directory in the tree | ||
| 3162 | that's displayed for the trace</para></listitem> | ||
| 3163 | <listitem><para>Below that, select 'Common Trace Format: | ||
| 3164 | Kernel Trace' for the 'Trace Type'</para></listitem> | ||
| 3165 | <listitem><para>Press 'Finish' to close the dialog | ||
| 3166 | </para></listitem> | ||
| 3167 | <listitem><para>Back in the 'Project Explorer' pane, | ||
| 3168 | double-click on the 'kernel' item for the | ||
| 3169 | trace you just imported under 'Traces' | ||
| 3170 | </para></listitem> | ||
| 3171 | </orderedlist> | ||
| 3172 | You should now see your trace data displayed graphically | ||
| 3173 | in several different views in Eclipse: | ||
| 3174 | </para> | ||
| 3175 | |||
| 3176 | <para> | ||
| 3177 | <imagedata fileref="figures/lttngmain0.png" width="6in" depth="6in" align="center" scalefit="1" /> | ||
| 3178 | </para> | ||
| 3179 | |||
| 3180 | <para> | ||
| 3181 | You can access extensive help information on how to use | ||
| 3182 | the LTTng plug-in to search and analyze captured traces via | ||
| 3183 | the Eclipse help system: | ||
| 3184 | <literallayout class='monospaced'> | ||
| 3185 | Help | Help Contents | LTTng Plug-in User Guide | ||
| 3186 | </literallayout> | ||
| 3187 | </para> | ||
| 3188 | </section> | ||
| 3189 | |||
| 3190 | <section id='collecting-and-viewing-a-trace-in-eclipse'> | ||
| 3191 | <title>Collecting and viewing a trace in Eclipse</title> | ||
| 3192 | |||
| 3193 | <note> | ||
| 3194 | This section on collecting traces remotely doesn't currently | ||
| 3195 | work because of Eclipse 'RSE' connectivity problems. Manually | ||
| 3196 | tracing on the target, copying the trace files to the host, | ||
| 3197 | and viewing the trace in Eclipse on the host as outlined in | ||
| 3198 | previous steps does work however - please use the manual | ||
| 3199 | steps outlined above to view traces in Eclipse. | ||
| 3200 | </note> | ||
| 3201 | |||
| 3202 | <para> | ||
| 3203 | In order to trace a remote target, you also need to add | ||
| 3204 | a 'tracing' group on the target and connect as a user | ||
| 3205 | who's part of that group e.g: | ||
| 3206 | <literallayout class='monospaced'> | ||
| 3207 | # adduser tomz | ||
| 3208 | # groupadd -r tracing | ||
| 3209 | # usermod -a -G tracing tomz | ||
| 3210 | </literallayout> | ||
| 3211 | <orderedlist> | ||
| 3212 | <listitem><para>First, start eclipse and open the | ||
| 3213 | 'LTTng Kernel' perspective by selecting the following | ||
| 3214 | menu item: | ||
| 3215 | <literallayout class='monospaced'> | ||
| 3216 | Window | Open Perspective | Other... | ||
| 3217 | </literallayout></para></listitem> | ||
| 3218 | <listitem><para>In the dialog box that opens, select | ||
| 3219 | 'LTTng Kernel' from the list.</para></listitem> | ||
| 3220 | <listitem><para>Back at the main menu, select the | ||
| 3221 | following menu item: | ||
| 3222 | <literallayout class='monospaced'> | ||
| 3223 | File | New | Project... | ||
| 3224 | </literallayout></para></listitem> | ||
| 3225 | <listitem><para>In the dialog box that opens, select | ||
| 3226 | the 'Tracing | Tracing Project' wizard and | ||
| 3227 | press 'Next>'.</para></listitem> | ||
| 3228 | <listitem><para>Give the project a name and press | ||
| 3229 | 'Finish'. That should result in an entry in the | ||
| 3230 | 'Project' subwindow.</para></listitem> | ||
| 3231 | <listitem><para>In the 'Control' subwindow just below | ||
| 3232 | it, press 'New Connection'.</para></listitem> | ||
| 3233 | <listitem><para>Add a new connection, giving it the | ||
| 3234 | hostname or IP address of the target system. | ||
| 3235 | </para></listitem> | ||
| 3236 | <listitem><para>Provide the username and password | ||
| 3237 | of a qualified user (a member of the 'tracing' group) | ||
| 3238 | or root account on the target system. | ||
| 3239 | </para></listitem> | ||
| 3240 | <listitem><para>Provide appropriate answers to whatever | ||
| 3241 | else is asked for e.g. 'secure storage password' | ||
| 3242 | can be anything you want. | ||
| 3243 | If you get an 'RSE Error' it may be due to proxies. | ||
| 3244 | It may be possible to get around the problem by | ||
| 3245 | changing the following setting: | ||
| 3246 | <literallayout class='monospaced'> | ||
| 3247 | Window | Preferences | Network Connections | ||
| 3248 | </literallayout> | ||
| 3249 | Switch 'Active Provider' to 'Direct' | ||
| 3250 | </para></listitem> | ||
| 3251 | </orderedlist> | ||
| 3252 | </para> | ||
| 3253 | </section> | ||
| 3254 | </section> | ||
| 3255 | |||
| 3256 | <section id='lltng-documentation'> | ||
| 3257 | <title>Documentation</title> | ||
| 3258 | |||
| 3259 | <para> | ||
| 3260 | There doesn't seem to be any current documentation covering | ||
| 3261 | LTTng 2.0, but maybe that's because the project is in transition. | ||
| 3262 | The LTTng 2.0 website, however, is here: | ||
| 3263 | <ulink url='http://lttng.org/lttng2.0'>LTTng Project</ulink> | ||
| 3264 | </para> | ||
| 3265 | |||
| 3266 | <para> | ||
| 3267 | You can access extensive help information on how to use the | ||
| 3268 | LTTng plug-in to search and analyze captured traces via the | ||
| 3269 | Eclipse help system: | ||
| 3270 | <literallayout class='monospaced'> | ||
| 3271 | Help | Help Contents | LTTng Plug-in User Guide | ||
| 3272 | </literallayout> | ||
| 3273 | </para> | ||
| 3274 | </section> | ||
| 3275 | </section> | ||
| 3276 | |||
| 3277 | <section id='profile-manual-blktrace'> | ||
| 3278 | <title>blktrace</title> | ||
| 3279 | |||
| 3280 | <para> | ||
| 3281 | blktrace is a tool for tracing and reporting low-level disk I/O. | ||
| 3282 | blktrace provides the tracing half of the equation; its output can | ||
| 3283 | be piped into the blkparse program, which renders the data in a | ||
| 3284 | human-readable form and does some basic analysis: | ||
| 3285 | </para> | ||
| 3286 | |||
| 3287 | <section id='blktrace-setup'> | ||
| 3288 | <title>Setup</title> | ||
| 3289 | |||
| 3290 | <para> | ||
| 3291 | For this section, we'll assume you've already performed the | ||
| 3292 | basic setup outlined in the | ||
| 3293 | "<link linkend='profile-manual-general-setup'>General Setup</link>" | ||
| 3294 | section. | ||
| 3295 | </para> | ||
| 3296 | |||
| 3297 | <para> | ||
| 3298 | blktrace is an application that runs on the target system. | ||
| 3299 | You can run the entire blktrace and blkparse pipeline on the | ||
| 3300 | target, or you can run blktrace in 'listen' mode on the target | ||
| 3301 | and have blktrace and blkparse collect and analyze the data on | ||
| 3302 | the host (see the | ||
| 3303 | "<link linkend='using-blktrace-remotely'>Using blktrace Remotely</link>" | ||
| 3304 | section below). | ||
| 3305 | For the rest of this section we assume you've ssh'ed to the | ||
| 3306 | host and will be running blkrace on the target. | ||
| 3307 | </para> | ||
| 3308 | </section> | ||
| 3309 | |||
| 3310 | <section id='blktrace-basic-usage'> | ||
| 3311 | <title>Basic Usage</title> | ||
| 3312 | |||
| 3313 | <para> | ||
| 3314 | To record a trace, simply run the 'blktrace' command, giving it | ||
| 3315 | the name of the block device you want to trace activity on: | ||
| 3316 | <literallayout class='monospaced'> | ||
| 3317 | root@crownbay:~# blktrace /dev/sdc | ||
| 3318 | </literallayout> | ||
| 3319 | In another shell, execute a workload you want to trace. | ||
| 3320 | <literallayout class='monospaced'> | ||
| 3321 | root@crownbay:/media/sdc# rm linux-2.6.19.2.tar.bz2; wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>; sync | ||
| 3322 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
| 3323 | linux-2.6.19.2.tar.b 100% |*******************************| 41727k 0:00:00 ETA | ||
| 3324 | </literallayout> | ||
| 3325 | Press Ctrl-C in the blktrace shell to stop the trace. It will | ||
| 3326 | display how many events were logged, along with the per-cpu file | ||
| 3327 | sizes (blktrace records traces in per-cpu kernel buffers and | ||
| 3328 | simply dumps them to userspace for blkparse to merge and sort | ||
| 3329 | later). | ||
| 3330 | <literallayout class='monospaced'> | ||
| 3331 | ^C=== sdc === | ||
| 3332 | CPU 0: 7082 events, 332 KiB data | ||
| 3333 | CPU 1: 1578 events, 74 KiB data | ||
| 3334 | Total: 8660 events (dropped 0), 406 KiB data | ||
| 3335 | </literallayout> | ||
| 3336 | If you examine the files saved to disk, you see multiple files, | ||
| 3337 | one per CPU and with the device name as the first part of the | ||
| 3338 | filename: | ||
| 3339 | <literallayout class='monospaced'> | ||
| 3340 | root@crownbay:~# ls -al | ||
| 3341 | drwxr-xr-x 6 root root 1024 Oct 27 22:39 . | ||
| 3342 | drwxr-sr-x 4 root root 1024 Oct 26 18:24 .. | ||
| 3343 | -rw-r--r-- 1 root root 339938 Oct 27 22:40 sdc.blktrace.0 | ||
| 3344 | -rw-r--r-- 1 root root 75753 Oct 27 22:40 sdc.blktrace.1 | ||
| 3345 | </literallayout> | ||
| 3346 | To view the trace events, simply invoke 'blkparse' in the | ||
| 3347 | directory containing the trace files, giving it the device name | ||
| 3348 | that forms the first part of the filenames: | ||
| 3349 | <literallayout class='monospaced'> | ||
| 3350 | root@crownbay:~# blkparse sdc | ||
| 3351 | |||
| 3352 | 8,32 1 1 0.000000000 1225 Q WS 3417048 + 8 [jbd2/sdc-8] | ||
| 3353 | 8,32 1 2 0.000025213 1225 G WS 3417048 + 8 [jbd2/sdc-8] | ||
| 3354 | 8,32 1 3 0.000033384 1225 P N [jbd2/sdc-8] | ||
| 3355 | 8,32 1 4 0.000043301 1225 I WS 3417048 + 8 [jbd2/sdc-8] | ||
| 3356 | 8,32 1 0 0.000057270 0 m N cfq1225 insert_request | ||
| 3357 | 8,32 1 0 0.000064813 0 m N cfq1225 add_to_rr | ||
| 3358 | 8,32 1 5 0.000076336 1225 U N [jbd2/sdc-8] 1 | ||
| 3359 | 8,32 1 0 0.000088559 0 m N cfq workload slice:150 | ||
| 3360 | 8,32 1 0 0.000097359 0 m N cfq1225 set_active wl_prio:0 wl_type:1 | ||
| 3361 | 8,32 1 0 0.000104063 0 m N cfq1225 Not idling. st->count:1 | ||
| 3362 | 8,32 1 0 0.000112584 0 m N cfq1225 fifo= (null) | ||
| 3363 | 8,32 1 0 0.000118730 0 m N cfq1225 dispatch_insert | ||
| 3364 | 8,32 1 0 0.000127390 0 m N cfq1225 dispatched a request | ||
| 3365 | 8,32 1 0 0.000133536 0 m N cfq1225 activate rq, drv=1 | ||
| 3366 | 8,32 1 6 0.000136889 1225 D WS 3417048 + 8 [jbd2/sdc-8] | ||
| 3367 | 8,32 1 7 0.000360381 1225 Q WS 3417056 + 8 [jbd2/sdc-8] | ||
| 3368 | 8,32 1 8 0.000377422 1225 G WS 3417056 + 8 [jbd2/sdc-8] | ||
| 3369 | 8,32 1 9 0.000388876 1225 P N [jbd2/sdc-8] | ||
| 3370 | 8,32 1 10 0.000397886 1225 Q WS 3417064 + 8 [jbd2/sdc-8] | ||
| 3371 | 8,32 1 11 0.000404800 1225 M WS 3417064 + 8 [jbd2/sdc-8] | ||
| 3372 | 8,32 1 12 0.000412343 1225 Q WS 3417072 + 8 [jbd2/sdc-8] | ||
| 3373 | 8,32 1 13 0.000416533 1225 M WS 3417072 + 8 [jbd2/sdc-8] | ||
| 3374 | 8,32 1 14 0.000422121 1225 Q WS 3417080 + 8 [jbd2/sdc-8] | ||
| 3375 | 8,32 1 15 0.000425194 1225 M WS 3417080 + 8 [jbd2/sdc-8] | ||
| 3376 | 8,32 1 16 0.000431968 1225 Q WS 3417088 + 8 [jbd2/sdc-8] | ||
| 3377 | 8,32 1 17 0.000435251 1225 M WS 3417088 + 8 [jbd2/sdc-8] | ||
| 3378 | 8,32 1 18 0.000440279 1225 Q WS 3417096 + 8 [jbd2/sdc-8] | ||
| 3379 | 8,32 1 19 0.000443911 1225 M WS 3417096 + 8 [jbd2/sdc-8] | ||
| 3380 | 8,32 1 20 0.000450336 1225 Q WS 3417104 + 8 [jbd2/sdc-8] | ||
| 3381 | 8,32 1 21 0.000454038 1225 M WS 3417104 + 8 [jbd2/sdc-8] | ||
| 3382 | 8,32 1 22 0.000462070 1225 Q WS 3417112 + 8 [jbd2/sdc-8] | ||
| 3383 | 8,32 1 23 0.000465422 1225 M WS 3417112 + 8 [jbd2/sdc-8] | ||
| 3384 | 8,32 1 24 0.000474222 1225 I WS 3417056 + 64 [jbd2/sdc-8] | ||
| 3385 | 8,32 1 0 0.000483022 0 m N cfq1225 insert_request | ||
| 3386 | 8,32 1 25 0.000489727 1225 U N [jbd2/sdc-8] 1 | ||
| 3387 | 8,32 1 0 0.000498457 0 m N cfq1225 Not idling. st->count:1 | ||
| 3388 | 8,32 1 0 0.000503765 0 m N cfq1225 dispatch_insert | ||
| 3389 | 8,32 1 0 0.000512914 0 m N cfq1225 dispatched a request | ||
| 3390 | 8,32 1 0 0.000518851 0 m N cfq1225 activate rq, drv=2 | ||
| 3391 | . | ||
| 3392 | . | ||
| 3393 | . | ||
| 3394 | 8,32 0 0 58.515006138 0 m N cfq3551 complete rqnoidle 1 | ||
| 3395 | 8,32 0 2024 58.516603269 3 C WS 3156992 + 16 [0] | ||
| 3396 | 8,32 0 0 58.516626736 0 m N cfq3551 complete rqnoidle 1 | ||
| 3397 | 8,32 0 0 58.516634558 0 m N cfq3551 arm_idle: 8 group_idle: 0 | ||
| 3398 | 8,32 0 0 58.516636933 0 m N cfq schedule dispatch | ||
| 3399 | 8,32 1 0 58.516971613 0 m N cfq3551 slice expired t=0 | ||
| 3400 | 8,32 1 0 58.516982089 0 m N cfq3551 sl_used=13 disp=6 charge=13 iops=0 sect=80 | ||
| 3401 | 8,32 1 0 58.516985511 0 m N cfq3551 del_from_rr | ||
| 3402 | 8,32 1 0 58.516990819 0 m N cfq3551 put_queue | ||
| 3403 | |||
| 3404 | CPU0 (sdc): | ||
| 3405 | Reads Queued: 0, 0KiB Writes Queued: 331, 26,284KiB | ||
| 3406 | Read Dispatches: 0, 0KiB Write Dispatches: 485, 40,484KiB | ||
| 3407 | Reads Requeued: 0 Writes Requeued: 0 | ||
| 3408 | Reads Completed: 0, 0KiB Writes Completed: 511, 41,000KiB | ||
| 3409 | Read Merges: 0, 0KiB Write Merges: 13, 160KiB | ||
| 3410 | Read depth: 0 Write depth: 2 | ||
| 3411 | IO unplugs: 23 Timer unplugs: 0 | ||
| 3412 | CPU1 (sdc): | ||
| 3413 | Reads Queued: 0, 0KiB Writes Queued: 249, 15,800KiB | ||
| 3414 | Read Dispatches: 0, 0KiB Write Dispatches: 42, 1,600KiB | ||
| 3415 | Reads Requeued: 0 Writes Requeued: 0 | ||
| 3416 | Reads Completed: 0, 0KiB Writes Completed: 16, 1,084KiB | ||
| 3417 | Read Merges: 0, 0KiB Write Merges: 40, 276KiB | ||
| 3418 | Read depth: 0 Write depth: 2 | ||
| 3419 | IO unplugs: 30 Timer unplugs: 1 | ||
| 3420 | |||
| 3421 | Total (sdc): | ||
| 3422 | Reads Queued: 0, 0KiB Writes Queued: 580, 42,084KiB | ||
| 3423 | Read Dispatches: 0, 0KiB Write Dispatches: 527, 42,084KiB | ||
| 3424 | Reads Requeued: 0 Writes Requeued: 0 | ||
| 3425 | Reads Completed: 0, 0KiB Writes Completed: 527, 42,084KiB | ||
| 3426 | Read Merges: 0, 0KiB Write Merges: 53, 436KiB | ||
| 3427 | IO unplugs: 53 Timer unplugs: 1 | ||
| 3428 | |||
| 3429 | Throughput (R/W): 0KiB/s / 719KiB/s | ||
| 3430 | Events (sdc): 6,592 entries | ||
| 3431 | Skips: 0 forward (0 - 0.0%) | ||
| 3432 | Input file sdc.blktrace.0 added | ||
| 3433 | Input file sdc.blktrace.1 added | ||
| 3434 | </literallayout> | ||
| 3435 | The report shows each event that was found in the blktrace data, | ||
| 3436 | along with a summary of the overall block I/O traffic during | ||
| 3437 | the run. You can look at the | ||
| 3438 | <ulink url='http://linux.die.net/man/1/blkparse'>blkparse</ulink> | ||
| 3439 | manpage to learn the | ||
| 3440 | meaning of each field displayed in the trace listing. | ||
| 3441 | </para> | ||
| 3442 | |||
| 3443 | <section id='blktrace-live-mode'> | ||
| 3444 | <title>Live Mode</title> | ||
| 3445 | |||
| 3446 | <para> | ||
| 3447 | blktrace and blkparse are designed from the ground up to | ||
| 3448 | be able to operate together in a 'pipe mode' where the | ||
| 3449 | stdout of blktrace can be fed directly into the stdin of | ||
| 3450 | blkparse: | ||
| 3451 | <literallayout class='monospaced'> | ||
| 3452 | root@crownbay:~# blktrace /dev/sdc -o - | blkparse -i - | ||
| 3453 | </literallayout> | ||
| 3454 | This enables long-lived tracing sessions to run without | ||
| 3455 | writing anything to disk, and allows the user to look for | ||
| 3456 | certain conditions in the trace data in 'real-time' by | ||
| 3457 | viewing the trace output as it scrolls by on the screen or | ||
| 3458 | by passing it along to yet another program in the pipeline | ||
| 3459 | such as grep which can be used to identify and capture | ||
| 3460 | conditions of interest. | ||
| 3461 | </para> | ||
| 3462 | |||
| 3463 | <para> | ||
| 3464 | There's actually another blktrace command that implements | ||
| 3465 | the above pipeline as a single command, so the user doesn't | ||
| 3466 | have to bother typing in the above command sequence: | ||
| 3467 | <literallayout class='monospaced'> | ||
| 3468 | root@crownbay:~# btrace /dev/sdc | ||
| 3469 | </literallayout> | ||
| 3470 | </para> | ||
| 3471 | </section> | ||
| 3472 | |||
| 3473 | <section id='using-blktrace-remotely'> | ||
| 3474 | <title>Using blktrace Remotely</title> | ||
| 3475 | |||
| 3476 | <para> | ||
| 3477 | Because blktrace traces block I/O and at the same time | ||
| 3478 | normally writes its trace data to a block device, and | ||
| 3479 | in general because it's not really a great idea to make | ||
| 3480 | the device being traced the same as the device the tracer | ||
| 3481 | writes to, blktrace provides a way to trace without | ||
| 3482 | perturbing the traced device at all by providing native | ||
| 3483 | support for sending all trace data over the network. | ||
| 3484 | </para> | ||
| 3485 | |||
| 3486 | <para> | ||
| 3487 | To have blktrace operate in this mode, start blktrace on | ||
| 3488 | the target system being traced with the -l option, along with | ||
| 3489 | the device to trace: | ||
| 3490 | <literallayout class='monospaced'> | ||
| 3491 | root@crownbay:~# blktrace -l /dev/sdc | ||
| 3492 | server: waiting for connections... | ||
| 3493 | </literallayout> | ||
| 3494 | On the host system, use the -h option to connect to the | ||
| 3495 | target system, also passing it the device to trace: | ||
| 3496 | <literallayout class='monospaced'> | ||
| 3497 | $ blktrace -d /dev/sdc -h 192.168.1.43 | ||
| 3498 | blktrace: connecting to 192.168.1.43 | ||
| 3499 | blktrace: connected! | ||
| 3500 | </literallayout> | ||
| 3501 | On the target system, you should see this: | ||
| 3502 | <literallayout class='monospaced'> | ||
| 3503 | server: connection from 192.168.1.43 | ||
| 3504 | </literallayout> | ||
| 3505 | In another shell, execute a workload you want to trace. | ||
| 3506 | <literallayout class='monospaced'> | ||
| 3507 | root@crownbay:/media/sdc# rm linux-2.6.19.2.tar.bz2; wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>; sync | ||
| 3508 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
| 3509 | linux-2.6.19.2.tar.b 100% |*******************************| 41727k 0:00:00 ETA | ||
| 3510 | </literallayout> | ||
| 3511 | When it's done, do a Ctrl-C on the host system to | ||
| 3512 | stop the trace: | ||
| 3513 | <literallayout class='monospaced'> | ||
| 3514 | ^C=== sdc === | ||
| 3515 | CPU 0: 7691 events, 361 KiB data | ||
| 3516 | CPU 1: 4109 events, 193 KiB data | ||
| 3517 | Total: 11800 events (dropped 0), 554 KiB data | ||
| 3518 | </literallayout> | ||
| 3519 | On the target system, you should also see a trace | ||
| 3520 | summary for the trace just ended: | ||
| 3521 | <literallayout class='monospaced'> | ||
| 3522 | server: end of run for 192.168.1.43:sdc | ||
| 3523 | === sdc === | ||
| 3524 | CPU 0: 7691 events, 361 KiB data | ||
| 3525 | CPU 1: 4109 events, 193 KiB data | ||
| 3526 | Total: 11800 events (dropped 0), 554 KiB data | ||
| 3527 | </literallayout> | ||
| 3528 | The blktrace instance on the host will save the target | ||
| 3529 | output inside a hostname-timestamp directory: | ||
| 3530 | <literallayout class='monospaced'> | ||
| 3531 | $ ls -al | ||
| 3532 | drwxr-xr-x 10 root root 1024 Oct 28 02:40 . | ||
| 3533 | drwxr-sr-x 4 root root 1024 Oct 26 18:24 .. | ||
| 3534 | drwxr-xr-x 2 root root 1024 Oct 28 02:40 192.168.1.43-2012-10-28-02:40:56 | ||
| 3535 | </literallayout> | ||
| 3536 | cd into that directory to see the output files: | ||
| 3537 | <literallayout class='monospaced'> | ||
| 3538 | $ ls -l | ||
| 3539 | -rw-r--r-- 1 root root 369193 Oct 28 02:44 sdc.blktrace.0 | ||
| 3540 | -rw-r--r-- 1 root root 197278 Oct 28 02:44 sdc.blktrace.1 | ||
| 3541 | </literallayout> | ||
| 3542 | And run blkparse on the host system using the device name: | ||
| 3543 | <literallayout class='monospaced'> | ||
| 3544 | $ blkparse sdc | ||
| 3545 | |||
| 3546 | 8,32 1 1 0.000000000 1263 Q RM 6016 + 8 [ls] | ||
| 3547 | 8,32 1 0 0.000036038 0 m N cfq1263 alloced | ||
| 3548 | 8,32 1 2 0.000039390 1263 G RM 6016 + 8 [ls] | ||
| 3549 | 8,32 1 3 0.000049168 1263 I RM 6016 + 8 [ls] | ||
| 3550 | 8,32 1 0 0.000056152 0 m N cfq1263 insert_request | ||
| 3551 | 8,32 1 0 0.000061600 0 m N cfq1263 add_to_rr | ||
| 3552 | 8,32 1 0 0.000075498 0 m N cfq workload slice:300 | ||
| 3553 | . | ||
| 3554 | . | ||
| 3555 | . | ||
| 3556 | 8,32 0 0 177.266385696 0 m N cfq1267 arm_idle: 8 group_idle: 0 | ||
| 3557 | 8,32 0 0 177.266388140 0 m N cfq schedule dispatch | ||
| 3558 | 8,32 1 0 177.266679239 0 m N cfq1267 slice expired t=0 | ||
| 3559 | 8,32 1 0 177.266689297 0 m N cfq1267 sl_used=9 disp=6 charge=9 iops=0 sect=56 | ||
| 3560 | 8,32 1 0 177.266692649 0 m N cfq1267 del_from_rr | ||
| 3561 | 8,32 1 0 177.266696560 0 m N cfq1267 put_queue | ||
| 3562 | |||
| 3563 | CPU0 (sdc): | ||
| 3564 | Reads Queued: 0, 0KiB Writes Queued: 270, 21,708KiB | ||
| 3565 | Read Dispatches: 59, 2,628KiB Write Dispatches: 495, 39,964KiB | ||
| 3566 | Reads Requeued: 0 Writes Requeued: 0 | ||
| 3567 | Reads Completed: 90, 2,752KiB Writes Completed: 543, 41,596KiB | ||
| 3568 | Read Merges: 0, 0KiB Write Merges: 9, 344KiB | ||
| 3569 | Read depth: 2 Write depth: 2 | ||
| 3570 | IO unplugs: 20 Timer unplugs: 1 | ||
| 3571 | CPU1 (sdc): | ||
| 3572 | Reads Queued: 688, 2,752KiB Writes Queued: 381, 20,652KiB | ||
| 3573 | Read Dispatches: 31, 124KiB Write Dispatches: 59, 2,396KiB | ||
| 3574 | Reads Requeued: 0 Writes Requeued: 0 | ||
| 3575 | Reads Completed: 0, 0KiB Writes Completed: 11, 764KiB | ||
| 3576 | Read Merges: 598, 2,392KiB Write Merges: 88, 448KiB | ||
| 3577 | Read depth: 2 Write depth: 2 | ||
| 3578 | IO unplugs: 52 Timer unplugs: 0 | ||
| 3579 | |||
| 3580 | Total (sdc): | ||
| 3581 | Reads Queued: 688, 2,752KiB Writes Queued: 651, 42,360KiB | ||
| 3582 | Read Dispatches: 90, 2,752KiB Write Dispatches: 554, 42,360KiB | ||
| 3583 | Reads Requeued: 0 Writes Requeued: 0 | ||
| 3584 | Reads Completed: 90, 2,752KiB Writes Completed: 554, 42,360KiB | ||
| 3585 | Read Merges: 598, 2,392KiB Write Merges: 97, 792KiB | ||
| 3586 | IO unplugs: 72 Timer unplugs: 1 | ||
| 3587 | |||
| 3588 | Throughput (R/W): 15KiB/s / 238KiB/s | ||
| 3589 | Events (sdc): 9,301 entries | ||
| 3590 | Skips: 0 forward (0 - 0.0%) | ||
| 3591 | </literallayout> | ||
| 3592 | You should see the trace events and summary just as | ||
| 3593 | you would have if you'd run the same command on the target. | ||
| 3594 | </para> | ||
| 3595 | </section> | ||
| 3596 | |||
| 3597 | <section id='tracing-block-io-via-ftrace'> | ||
| 3598 | <title>Tracing Block I/O via 'ftrace'</title> | ||
| 3599 | |||
| 3600 | <para> | ||
| 3601 | It's also possible to trace block I/O using only | ||
| 3602 | <link linkend='the-trace-events-subsystem'>trace events subsystem</link>, | ||
| 3603 | which can be useful for casual tracing | ||
| 3604 | if you don't want to bother dealing with the userspace tools. | ||
| 3605 | </para> | ||
| 3606 | |||
| 3607 | <para> | ||
| 3608 | To enable tracing for a given device, use | ||
| 3609 | /sys/block/xxx/trace/enable, where xxx is the device name. | ||
| 3610 | This for example enables tracing for /dev/sdc: | ||
| 3611 | <literallayout class='monospaced'> | ||
| 3612 | root@crownbay:/sys/kernel/debug/tracing# echo 1 > /sys/block/sdc/trace/enable | ||
| 3613 | </literallayout> | ||
| 3614 | Once you've selected the device(s) you want to trace, | ||
| 3615 | selecting the 'blk' tracer will turn the blk tracer on: | ||
| 3616 | <literallayout class='monospaced'> | ||
| 3617 | root@crownbay:/sys/kernel/debug/tracing# cat available_tracers | ||
| 3618 | blk function_graph function nop | ||
| 3619 | |||
| 3620 | root@crownbay:/sys/kernel/debug/tracing# echo blk > current_tracer | ||
| 3621 | </literallayout> | ||
| 3622 | Execute the workload you're interested in: | ||
| 3623 | <literallayout class='monospaced'> | ||
| 3624 | root@crownbay:/sys/kernel/debug/tracing# cat /media/sdc/testfile.txt | ||
| 3625 | </literallayout> | ||
| 3626 | And look at the output (note here that we're using | ||
| 3627 | 'trace_pipe' instead of trace to capture this trace - | ||
| 3628 | this allows us to wait around on the pipe for data to | ||
| 3629 | appear): | ||
| 3630 | <literallayout class='monospaced'> | ||
| 3631 | root@crownbay:/sys/kernel/debug/tracing# cat trace_pipe | ||
| 3632 | cat-3587 [001] d..1 3023.276361: 8,32 Q R 1699848 + 8 [cat] | ||
| 3633 | cat-3587 [001] d..1 3023.276410: 8,32 m N cfq3587 alloced | ||
| 3634 | cat-3587 [001] d..1 3023.276415: 8,32 G R 1699848 + 8 [cat] | ||
| 3635 | cat-3587 [001] d..1 3023.276424: 8,32 P N [cat] | ||
| 3636 | cat-3587 [001] d..2 3023.276432: 8,32 I R 1699848 + 8 [cat] | ||
| 3637 | cat-3587 [001] d..1 3023.276439: 8,32 m N cfq3587 insert_request | ||
| 3638 | cat-3587 [001] d..1 3023.276445: 8,32 m N cfq3587 add_to_rr | ||
| 3639 | cat-3587 [001] d..2 3023.276454: 8,32 U N [cat] 1 | ||
| 3640 | cat-3587 [001] d..1 3023.276464: 8,32 m N cfq workload slice:150 | ||
| 3641 | cat-3587 [001] d..1 3023.276471: 8,32 m N cfq3587 set_active wl_prio:0 wl_type:2 | ||
| 3642 | cat-3587 [001] d..1 3023.276478: 8,32 m N cfq3587 fifo= (null) | ||
| 3643 | cat-3587 [001] d..1 3023.276483: 8,32 m N cfq3587 dispatch_insert | ||
| 3644 | cat-3587 [001] d..1 3023.276490: 8,32 m N cfq3587 dispatched a request | ||
| 3645 | cat-3587 [001] d..1 3023.276497: 8,32 m N cfq3587 activate rq, drv=1 | ||
| 3646 | cat-3587 [001] d..2 3023.276500: 8,32 D R 1699848 + 8 [cat] | ||
| 3647 | </literallayout> | ||
| 3648 | And this turns off tracing for the specified device: | ||
| 3649 | <literallayout class='monospaced'> | ||
| 3650 | root@crownbay:/sys/kernel/debug/tracing# echo 0 > /sys/block/sdc/trace/enable | ||
| 3651 | </literallayout> | ||
| 3652 | </para> | ||
| 3653 | </section> | ||
| 3654 | </section> | ||
| 3655 | |||
| 3656 | <section id='blktrace-documentation'> | ||
| 3657 | <title>Documentation</title> | ||
| 3658 | |||
| 3659 | <para> | ||
| 3660 | Online versions of the man pages for the commands discussed | ||
| 3661 | in this section can be found here: | ||
| 3662 | <itemizedlist> | ||
| 3663 | <listitem><para><ulink url='http://linux.die.net/man/8/blktrace'>http://linux.die.net/man/8/blktrace</ulink> | ||
| 3664 | </para></listitem> | ||
| 3665 | <listitem><para><ulink url='http://linux.die.net/man/1/blkparse'>http://linux.die.net/man/1/blkparse</ulink> | ||
| 3666 | </para></listitem> | ||
| 3667 | <listitem><para><ulink url='http://linux.die.net/man/8/btrace'>http://linux.die.net/man/8/btrace</ulink> | ||
| 3668 | </para></listitem> | ||
| 3669 | </itemizedlist> | ||
| 3670 | </para> | ||
| 3671 | |||
| 3672 | <para> | ||
| 3673 | The above manpages, along with manpages for the other | ||
| 3674 | blktrace utilities (btt, blkiomon, etc) can be found in the | ||
| 3675 | /doc directory of the blktrace tools git repo: | ||
| 3676 | <literallayout class='monospaced'> | ||
| 3677 | $ git clone git://git.kernel.dk/blktrace.git | ||
| 3678 | </literallayout> | ||
| 3679 | </para> | ||
| 3680 | </section> | ||
| 3681 | </section> | ||
| 3682 | </chapter> | ||
| 3683 | <!-- | ||
| 3684 | vim: expandtab tw=80 ts=4 | ||
| 3685 | --> | ||
diff --git a/documentation/profile-manual/profile-manual.xml b/documentation/profile-manual/profile-manual.xml new file mode 100644 index 0000000000..ed1176f326 --- /dev/null +++ b/documentation/profile-manual/profile-manual.xml | |||
| @@ -0,0 +1,90 @@ | |||
| 1 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
| 2 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" | ||
| 3 | [<!ENTITY % poky SYSTEM "../poky.ent"> %poky; ] > | ||
| 4 | |||
| 5 | <book id='profile-manual' lang='en' | ||
| 6 | xmlns:xi="http://www.w3.org/2003/XInclude" | ||
| 7 | xmlns="http://docbook.org/ns/docbook" | ||
| 8 | > | ||
| 9 | <bookinfo> | ||
| 10 | |||
| 11 | <mediaobject> | ||
| 12 | <imageobject> | ||
| 13 | <imagedata fileref='figures/profile-title.png' | ||
| 14 | format='SVG' | ||
| 15 | align='left' scalefit='1' width='100%'/> | ||
| 16 | </imageobject> | ||
| 17 | </mediaobject> | ||
| 18 | |||
| 19 | <title> | ||
| 20 | Yocto Project Profiling and Tracing Manual | ||
| 21 | </title> | ||
| 22 | |||
| 23 | <authorgroup> | ||
| 24 | <author> | ||
| 25 | <firstname>Tom</firstname> <surname>Zanussi</surname> | ||
| 26 | <affiliation> | ||
| 27 | <orgname>Intel Corporation</orgname> | ||
| 28 | </affiliation> | ||
| 29 | <email>tom.zanussi@intel.com</email> | ||
| 30 | </author> | ||
| 31 | </authorgroup> | ||
| 32 | |||
| 33 | <revhistory> | ||
| 34 | <revision> | ||
| 35 | <revnumber>1.4</revnumber> | ||
| 36 | <date>April 2013</date> | ||
| 37 | <revremark>Released with the Yocto Project 1.4 Release.</revremark> | ||
| 38 | </revision> | ||
| 39 | <revision> | ||
| 40 | <revnumber>1.5</revnumber> | ||
| 41 | <date>October 2013</date> | ||
| 42 | <revremark>Released with the Yocto Project 1.5 Release.</revremark> | ||
| 43 | </revision> | ||
| 44 | <revision> | ||
| 45 | <revnumber>1.5.1</revnumber> | ||
| 46 | <date>January 2014</date> | ||
| 47 | <revremark>Released with the Yocto Project 1.5.1 Release.</revremark> | ||
| 48 | </revision> | ||
| 49 | <revision> | ||
| 50 | <revnumber>1.6</revnumber> | ||
| 51 | <date>April 2014</date> | ||
| 52 | <revremark>Released with the Yocto Project 1.6 Release.</revremark> | ||
| 53 | </revision> | ||
| 54 | </revhistory> | ||
| 55 | |||
| 56 | <copyright> | ||
| 57 | <year>©RIGHT_YEAR;</year> | ||
| 58 | <holder>Linux Foundation</holder> | ||
| 59 | </copyright> | ||
| 60 | |||
| 61 | <legalnotice> | ||
| 62 | <para> | ||
| 63 | Permission is granted to copy, distribute and/or modify this document under | ||
| 64 | the terms of the <ulink type="http" url="http://creativecommons.org/licenses/by-sa/2.0/uk/"> | ||
| 65 | Creative Commons Attribution-Share Alike 2.0 UK: England & Wales</ulink> as published by | ||
| 66 | Creative Commons. | ||
| 67 | </para> | ||
| 68 | |||
| 69 | <note> | ||
| 70 | For the latest version of this manual associated with this | ||
| 71 | Yocto Project release, see the | ||
| 72 | <ulink url='&YOCTO_DOCS_PROF_URL;'>Yocto Project Profiling and Tracing Manual</ulink> | ||
| 73 | from the Yocto Project website. | ||
| 74 | </note> | ||
| 75 | </legalnotice> | ||
| 76 | |||
| 77 | </bookinfo> | ||
| 78 | |||
| 79 | <xi:include href="profile-manual-intro.xml"/> | ||
| 80 | |||
| 81 | <xi:include href="profile-manual-arch.xml"/> | ||
| 82 | |||
| 83 | <xi:include href="profile-manual-usage.xml"/> | ||
| 84 | |||
| 85 | <xi:include href="profile-manual-examples.xml"/> | ||
| 86 | |||
| 87 | </book> | ||
| 88 | <!-- | ||
| 89 | vim: expandtab tw=80 ts=4 | ||
| 90 | --> | ||
