diff options
Diffstat (limited to 'documentation/profile-manual')
-rw-r--r-- | documentation/profile-manual/profile-manual-arch.xml | 46 | ||||
-rw-r--r-- | documentation/profile-manual/profile-manual-customization.xsl | 29 | ||||
-rw-r--r-- | documentation/profile-manual/profile-manual-examples.xml | 40 | ||||
-rw-r--r-- | documentation/profile-manual/profile-manual-intro.xml | 107 | ||||
-rw-r--r-- | documentation/profile-manual/profile-manual-style.css | 987 | ||||
-rw-r--r-- | documentation/profile-manual/profile-manual-usage.xml | 2986 | ||||
-rwxr-xr-x | documentation/profile-manual/profile-manual.xml | 180 |
7 files changed, 0 insertions, 4375 deletions
diff --git a/documentation/profile-manual/profile-manual-arch.xml b/documentation/profile-manual/profile-manual-arch.xml deleted file mode 100644 index 8eb7bbfabd..0000000000 --- a/documentation/profile-manual/profile-manual-arch.xml +++ /dev/null | |||
@@ -1,46 +0,0 @@ | |||
1 | <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
2 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" | ||
3 | [<!ENTITY % poky SYSTEM "../poky.ent"> %poky; ] > | ||
4 | <!--SPDX-License-Identifier: CC-BY-2.0-UK--> | ||
5 | |||
6 | <chapter id='profile-manual-arch'> | ||
7 | |||
8 | <title>Overall Architecture of the Linux Tracing and Profiling Tools</title> | ||
9 | |||
10 | <section id='architecture-of-the-tracing-and-profiling-tools'> | ||
11 | <title>Architecture of the Tracing and Profiling Tools</title> | ||
12 | |||
13 | <para> | ||
14 | It may seem surprising to see a section covering an 'overall architecture' | ||
15 | for what seems to be a random collection of tracing tools that together | ||
16 | make up the Linux tracing and profiling space. | ||
17 | The fact is, however, that in recent years this seemingly disparate | ||
18 | set of tools has started to converge on a 'core' set of underlying | ||
19 | mechanisms: | ||
20 | </para> | ||
21 | |||
22 | <para> | ||
23 | <itemizedlist> | ||
24 | <listitem>static tracepoints</listitem> | ||
25 | <listitem>dynamic tracepoints | ||
26 | <itemizedlist> | ||
27 | <listitem>kprobes</listitem> | ||
28 | <listitem>uprobes</listitem> | ||
29 | </itemizedlist> | ||
30 | </listitem> | ||
31 | <listitem>the perf_events subsystem</listitem> | ||
32 | <listitem>debugfs</listitem> | ||
33 | </itemizedlist> | ||
34 | </para> | ||
35 | |||
36 | <informalexample> | ||
37 | <emphasis>Tying it Together:</emphasis> Rather than enumerating here how each tool makes use of | ||
38 | these common mechanisms, textboxes like this will make note of the | ||
39 | specific usages in each tool as they come up in the course | ||
40 | of the text. | ||
41 | </informalexample> | ||
42 | </section> | ||
43 | </chapter> | ||
44 | <!-- | ||
45 | vim: expandtab tw=80 ts=4 | ||
46 | --> | ||
diff --git a/documentation/profile-manual/profile-manual-customization.xsl b/documentation/profile-manual/profile-manual-customization.xsl deleted file mode 100644 index d995e0b3cb..0000000000 --- a/documentation/profile-manual/profile-manual-customization.xsl +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | <?xml version='1.0'?> | ||
2 | <!--SPDX-License-Identifier: CC-BY-2.0-UK--> | ||
3 | |||
4 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns="http://www.w3.org/1999/xhtml" xmlns:fo="http://www.w3.org/1999/XSL/Format" version="1.0"> | ||
5 | |||
6 | <xsl:import href="http://downloads.yoctoproject.org/mirror/docbook-mirror/docbook-xsl-1.76.1/xhtml/docbook.xsl" /> | ||
7 | |||
8 | <!-- | ||
9 | |||
10 | <xsl:import href="../template/1.76.1/docbook-xsl-1.76.1/xhtml/docbook.xsl" /> | ||
11 | |||
12 | <xsl:import href="http://docbook.sourceforge.net/release/xsl/1.76.1/xhtml/docbook.xsl" /> | ||
13 | |||
14 | --> | ||
15 | |||
16 | <xsl:include href="../template/permalinks.xsl"/> | ||
17 | <xsl:include href="../template/section.title.xsl"/> | ||
18 | <xsl:include href="../template/component.title.xsl"/> | ||
19 | <xsl:include href="../template/division.title.xsl"/> | ||
20 | <xsl:include href="../template/formal.object.heading.xsl"/> | ||
21 | |||
22 | <xsl:param name="html.stylesheet" select="'profile-manual-style.css'" /> | ||
23 | <xsl:param name="chapter.autolabel" select="1" /> | ||
24 | <xsl:param name="appendix.autolabel" select="A" /> | ||
25 | <xsl:param name="section.autolabel" select="1" /> | ||
26 | <xsl:param name="section.label.includes.component.label" select="1" /> | ||
27 | <xsl:param name="generate.id.attributes" select="1" /> | ||
28 | |||
29 | </xsl:stylesheet> | ||
diff --git a/documentation/profile-manual/profile-manual-examples.xml b/documentation/profile-manual/profile-manual-examples.xml deleted file mode 100644 index 91e06fcd1c..0000000000 --- a/documentation/profile-manual/profile-manual-examples.xml +++ /dev/null | |||
@@ -1,40 +0,0 @@ | |||
1 | <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
2 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" | ||
3 | [<!ENTITY % poky SYSTEM "../poky.ent"> %poky; ] > | ||
4 | <!--SPDX-License-Identifier: CC-BY-2.0-UK--> | ||
5 | |||
6 | <chapter id='profile-manual-examples'> | ||
7 | |||
8 | <title>Real-World Examples</title> | ||
9 | |||
10 | <para> | ||
11 | This chapter contains real-world examples. | ||
12 | </para> | ||
13 | |||
14 | <section id='slow-write-speed-on-live-images'> | ||
15 | <title>Slow Write Speed on Live Images</title> | ||
16 | |||
17 | <para> | ||
18 | In one of our previous releases (denzil), users noticed that booting | ||
19 | off of a live image and writing to disk was noticeably slower. | ||
20 | This included the boot itself, especially the first one, since first | ||
21 | boots tend to do a significant amount of writing due to certain | ||
22 | post-install scripts. | ||
23 | </para> | ||
24 | |||
25 | <para> | ||
26 | The problem (and solution) was discovered by using the Yocto tracing | ||
27 | tools, in this case 'perf stat', 'perf script', 'perf record' | ||
28 | and 'perf report'. | ||
29 | </para> | ||
30 | |||
31 | <para> | ||
32 | See all the unvarnished details of how this bug was diagnosed and | ||
33 | solved here: Yocto Bug #3049 | ||
34 | </para> | ||
35 | </section> | ||
36 | |||
37 | </chapter> | ||
38 | <!-- | ||
39 | vim: expandtab tw=80 ts=4 | ||
40 | --> | ||
diff --git a/documentation/profile-manual/profile-manual-intro.xml b/documentation/profile-manual/profile-manual-intro.xml deleted file mode 100644 index a2d2f80ec0..0000000000 --- a/documentation/profile-manual/profile-manual-intro.xml +++ /dev/null | |||
@@ -1,107 +0,0 @@ | |||
1 | <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
2 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" | ||
3 | [<!ENTITY % poky SYSTEM "../poky.ent"> %poky; ] > | ||
4 | <!--SPDX-License-Identifier: CC-BY-2.0-UK--> | ||
5 | |||
6 | <chapter id='profile-manual-intro'> | ||
7 | |||
8 | <title>Yocto Project Profiling and Tracing Manual</title> | ||
9 | <section id='profile-intro'> | ||
10 | <title>Introduction</title> | ||
11 | |||
12 | <para> | ||
13 | Yocto bundles a number of tracing and profiling tools - this 'HOWTO' | ||
14 | describes their basic usage and shows by example how to make use | ||
15 | of them to examine application and system behavior. | ||
16 | </para> | ||
17 | |||
18 | <para> | ||
19 | The tools presented are for the most part completely open-ended and | ||
20 | have quite good and/or extensive documentation of their own which | ||
21 | can be used to solve just about any problem you might come across | ||
22 | in Linux. | ||
23 | Each section that describes a particular tool has links to that | ||
24 | tool's documentation and website. | ||
25 | </para> | ||
26 | |||
27 | <para> | ||
28 | The purpose of this 'HOWTO' is to present a set of common and | ||
29 | generally useful tracing and profiling idioms along with their | ||
30 | application (as appropriate) to each tool, in the context of a | ||
31 | general-purpose 'drill-down' methodology that can be applied | ||
32 | to solving a large number (90%?) of problems. | ||
33 | For help with more advanced usages and problems, please see | ||
34 | the documentation and/or websites listed for each tool. | ||
35 | </para> | ||
36 | |||
37 | <para> | ||
38 | The final section of this 'HOWTO' is a collection of real-world | ||
39 | examples which we'll be continually adding to as we solve more | ||
40 | problems using the tools - feel free to add your own examples | ||
41 | to the list! | ||
42 | </para> | ||
43 | </section> | ||
44 | |||
45 | <section id='profile-manual-general-setup'> | ||
46 | <title>General Setup</title> | ||
47 | |||
48 | <para> | ||
49 | Most of the tools are available only in 'sdk' images or in images | ||
50 | built after adding 'tools-profile' to your local.conf. | ||
51 | So, in order to be able to access all of the tools described here, | ||
52 | please first build and boot an 'sdk' image e.g. | ||
53 | <literallayout class='monospaced'> | ||
54 | $ bitbake core-image-sato-sdk | ||
55 | </literallayout> | ||
56 | or alternatively by adding 'tools-profile' to the | ||
57 | EXTRA_IMAGE_FEATURES line in your local.conf: | ||
58 | <literallayout class='monospaced'> | ||
59 | EXTRA_IMAGE_FEATURES = "debug-tweaks tools-profile" | ||
60 | </literallayout> | ||
61 | If you use the 'tools-profile' method, you don't need to build an | ||
62 | sdk image - the tracing and profiling tools will be included in | ||
63 | non-sdk images as well e.g.: | ||
64 | <literallayout class='monospaced'> | ||
65 | $ bitbake core-image-sato | ||
66 | </literallayout> | ||
67 | <note><para> | ||
68 | By default, the Yocto build system strips symbols from the | ||
69 | binaries it packages, which makes it difficult to use some | ||
70 | of the tools. | ||
71 | </para><para>You can prevent that by setting the | ||
72 | <ulink url='&YOCTO_DOCS_REF_URL;#var-INHIBIT_PACKAGE_STRIP'><filename>INHIBIT_PACKAGE_STRIP</filename></ulink> | ||
73 | variable to "1" in your | ||
74 | <filename>local.conf</filename> when you build the image: | ||
75 | </para> | ||
76 | </note> | ||
77 | <literallayout class='monospaced'> | ||
78 | INHIBIT_PACKAGE_STRIP = "1" | ||
79 | </literallayout> | ||
80 | The above setting will noticeably increase the size of your image. | ||
81 | </para> | ||
82 | |||
83 | <para> | ||
84 | If you've already built a stripped image, you can generate | ||
85 | debug packages (xxx-dbg) which you can manually install as | ||
86 | needed. | ||
87 | </para> | ||
88 | |||
89 | <para> | ||
90 | To generate debug info for packages, you can add dbg-pkgs to | ||
91 | EXTRA_IMAGE_FEATURES in local.conf. For example: | ||
92 | <literallayout class='monospaced'> | ||
93 | EXTRA_IMAGE_FEATURES = "debug-tweaks tools-profile dbg-pkgs" | ||
94 | </literallayout> | ||
95 | Additionally, in order to generate the right type of | ||
96 | debuginfo, we also need to set | ||
97 | <ulink url='&YOCTO_DOCS_REF_URL;#var-PACKAGE_DEBUG_SPLIT_STYLE'><filename>PACKAGE_DEBUG_SPLIT_STYLE</filename></ulink> | ||
98 | in the <filename>local.conf</filename> file: | ||
99 | <literallayout class='monospaced'> | ||
100 | PACKAGE_DEBUG_SPLIT_STYLE = 'debug-file-directory' | ||
101 | </literallayout> | ||
102 | </para> | ||
103 | </section> | ||
104 | </chapter> | ||
105 | <!-- | ||
106 | vim: expandtab tw=80 ts=4 | ||
107 | --> | ||
diff --git a/documentation/profile-manual/profile-manual-style.css b/documentation/profile-manual/profile-manual-style.css deleted file mode 100644 index 8502c11090..0000000000 --- a/documentation/profile-manual/profile-manual-style.css +++ /dev/null | |||
@@ -1,987 +0,0 @@ | |||
1 | /* | ||
2 | |||
3 | SPDX-License-Identifier: CC-BY-2.0-UK | ||
4 | |||
5 | Generic XHTML / DocBook XHTML CSS Stylesheet. | ||
6 | |||
7 | Browser wrangling and typographic design by | ||
8 | Oyvind Kolas / pippin@gimp.org | ||
9 | |||
10 | Customised for Poky by | ||
11 | Matthew Allum / mallum@o-hand.com | ||
12 | |||
13 | Thanks to: | ||
14 | Liam R. E. Quin | ||
15 | William Skaggs | ||
16 | Jakub Steiner | ||
17 | |||
18 | Structure | ||
19 | --------- | ||
20 | |||
21 | The stylesheet is divided into the following sections: | ||
22 | |||
23 | Positioning | ||
24 | Margins, paddings, width, font-size, clearing. | ||
25 | Decorations | ||
26 | Borders, style | ||
27 | Colors | ||
28 | Colors | ||
29 | Graphics | ||
30 | Graphical backgrounds | ||
31 | Nasty IE tweaks | ||
32 | Workarounds needed to make it work in internet explorer, | ||
33 | currently makes the stylesheet non validating, but up until | ||
34 | this point it is validating. | ||
35 | Mozilla extensions | ||
36 | Transparency for footer | ||
37 | Rounded corners on boxes | ||
38 | |||
39 | */ | ||
40 | |||
41 | |||
42 | /*************** / | ||
43 | / Positioning / | ||
44 | / ***************/ | ||
45 | |||
46 | body { | ||
47 | font-family: Verdana, Sans, sans-serif; | ||
48 | |||
49 | min-width: 640px; | ||
50 | width: 80%; | ||
51 | margin: 0em auto; | ||
52 | padding: 2em 5em 5em 5em; | ||
53 | color: #333; | ||
54 | } | ||
55 | |||
56 | h1,h2,h3,h4,h5,h6,h7 { | ||
57 | font-family: Arial, Sans; | ||
58 | color: #00557D; | ||
59 | clear: both; | ||
60 | } | ||
61 | |||
62 | h1 { | ||
63 | font-size: 2em; | ||
64 | text-align: left; | ||
65 | padding: 0em 0em 0em 0em; | ||
66 | margin: 2em 0em 0em 0em; | ||
67 | } | ||
68 | |||
69 | h2.subtitle { | ||
70 | margin: 0.10em 0em 3.0em 0em; | ||
71 | padding: 0em 0em 0em 0em; | ||
72 | font-size: 1.8em; | ||
73 | padding-left: 20%; | ||
74 | font-weight: normal; | ||
75 | font-style: italic; | ||
76 | } | ||
77 | |||
78 | h2 { | ||
79 | margin: 2em 0em 0.66em 0em; | ||
80 | padding: 0.5em 0em 0em 0em; | ||
81 | font-size: 1.5em; | ||
82 | font-weight: bold; | ||
83 | } | ||
84 | |||
85 | h3.subtitle { | ||
86 | margin: 0em 0em 1em 0em; | ||
87 | padding: 0em 0em 0em 0em; | ||
88 | font-size: 142.14%; | ||
89 | text-align: right; | ||
90 | } | ||
91 | |||
92 | h3 { | ||
93 | margin: 1em 0em 0.5em 0em; | ||
94 | padding: 1em 0em 0em 0em; | ||
95 | font-size: 140%; | ||
96 | font-weight: bold; | ||
97 | } | ||
98 | |||
99 | h4 { | ||
100 | margin: 1em 0em 0.5em 0em; | ||
101 | padding: 1em 0em 0em 0em; | ||
102 | font-size: 120%; | ||
103 | font-weight: bold; | ||
104 | } | ||
105 | |||
106 | h5 { | ||
107 | margin: 1em 0em 0.5em 0em; | ||
108 | padding: 1em 0em 0em 0em; | ||
109 | font-size: 110%; | ||
110 | font-weight: bold; | ||
111 | } | ||
112 | |||
113 | h6 { | ||
114 | margin: 1em 0em 0em 0em; | ||
115 | padding: 1em 0em 0em 0em; | ||
116 | font-size: 110%; | ||
117 | font-weight: bold; | ||
118 | } | ||
119 | |||
120 | .authorgroup { | ||
121 | background-color: transparent; | ||
122 | background-repeat: no-repeat; | ||
123 | padding-top: 256px; | ||
124 | background-image: url("figures/profile-title.png"); | ||
125 | background-position: left top; | ||
126 | margin-top: -256px; | ||
127 | padding-right: 50px; | ||
128 | margin-left: 0px; | ||
129 | text-align: right; | ||
130 | width: 740px; | ||
131 | } | ||
132 | |||
133 | h3.author { | ||
134 | margin: 0em 0me 0em 0em; | ||
135 | padding: 0em 0em 0em 0em; | ||
136 | font-weight: normal; | ||
137 | font-size: 100%; | ||
138 | color: #333; | ||
139 | clear: both; | ||
140 | } | ||
141 | |||
142 | .author tt.email { | ||
143 | font-size: 66%; | ||
144 | } | ||
145 | |||
146 | .titlepage hr { | ||
147 | width: 0em; | ||
148 | clear: both; | ||
149 | } | ||
150 | |||
151 | .revhistory { | ||
152 | padding-top: 2em; | ||
153 | clear: both; | ||
154 | } | ||
155 | |||
156 | .toc, | ||
157 | .list-of-tables, | ||
158 | .list-of-examples, | ||
159 | .list-of-figures { | ||
160 | padding: 1.33em 0em 2.5em 0em; | ||
161 | color: #00557D; | ||
162 | } | ||
163 | |||
164 | .toc p, | ||
165 | .list-of-tables p, | ||
166 | .list-of-figures p, | ||
167 | .list-of-examples p { | ||
168 | padding: 0em 0em 0em 0em; | ||
169 | padding: 0em 0em 0.3em; | ||
170 | margin: 1.5em 0em 0em 0em; | ||
171 | } | ||
172 | |||
173 | .toc p b, | ||
174 | .list-of-tables p b, | ||
175 | .list-of-figures p b, | ||
176 | .list-of-examples p b{ | ||
177 | font-size: 100.0%; | ||
178 | font-weight: bold; | ||
179 | } | ||
180 | |||
181 | .toc dl, | ||
182 | .list-of-tables dl, | ||
183 | .list-of-figures dl, | ||
184 | .list-of-examples dl { | ||
185 | margin: 0em 0em 0.5em 0em; | ||
186 | padding: 0em 0em 0em 0em; | ||
187 | } | ||
188 | |||
189 | .toc dt { | ||
190 | margin: 0em 0em 0em 0em; | ||
191 | padding: 0em 0em 0em 0em; | ||
192 | } | ||
193 | |||
194 | .toc dd { | ||
195 | margin: 0em 0em 0em 2.6em; | ||
196 | padding: 0em 0em 0em 0em; | ||
197 | } | ||
198 | |||
199 | div.glossary dl, | ||
200 | div.variablelist dl { | ||
201 | } | ||
202 | |||
203 | .glossary dl dt, | ||
204 | .variablelist dl dt, | ||
205 | .variablelist dl dt span.term { | ||
206 | font-weight: normal; | ||
207 | width: 20em; | ||
208 | text-align: right; | ||
209 | } | ||
210 | |||
211 | .variablelist dl dt { | ||
212 | margin-top: 0.5em; | ||
213 | } | ||
214 | |||
215 | .glossary dl dd, | ||
216 | .variablelist dl dd { | ||
217 | margin-top: -1em; | ||
218 | margin-left: 25.5em; | ||
219 | } | ||
220 | |||
221 | .glossary dd p, | ||
222 | .variablelist dd p { | ||
223 | margin-top: 0em; | ||
224 | margin-bottom: 1em; | ||
225 | } | ||
226 | |||
227 | |||
228 | div.calloutlist table td { | ||
229 | padding: 0em 0em 0em 0em; | ||
230 | margin: 0em 0em 0em 0em; | ||
231 | } | ||
232 | |||
233 | div.calloutlist table td p { | ||
234 | margin-top: 0em; | ||
235 | margin-bottom: 1em; | ||
236 | } | ||
237 | |||
238 | div p.copyright { | ||
239 | text-align: left; | ||
240 | } | ||
241 | |||
242 | div.legalnotice p.legalnotice-title { | ||
243 | margin-bottom: 0em; | ||
244 | } | ||
245 | |||
246 | p { | ||
247 | line-height: 1.5em; | ||
248 | margin-top: 0em; | ||
249 | |||
250 | } | ||
251 | |||
252 | dl { | ||
253 | padding-top: 0em; | ||
254 | } | ||
255 | |||
256 | hr { | ||
257 | border: solid 1px; | ||
258 | } | ||
259 | |||
260 | |||
261 | .mediaobject, | ||
262 | .mediaobjectco { | ||
263 | text-align: center; | ||
264 | } | ||
265 | |||
266 | img { | ||
267 | border: none; | ||
268 | } | ||
269 | |||
270 | ul { | ||
271 | padding: 0em 0em 0em 1.5em; | ||
272 | } | ||
273 | |||
274 | ul li { | ||
275 | padding: 0em 0em 0em 0em; | ||
276 | } | ||
277 | |||
278 | ul li p { | ||
279 | text-align: left; | ||
280 | } | ||
281 | |||
282 | table { | ||
283 | width :100%; | ||
284 | } | ||
285 | |||
286 | th { | ||
287 | padding: 0.25em; | ||
288 | text-align: left; | ||
289 | font-weight: normal; | ||
290 | vertical-align: top; | ||
291 | } | ||
292 | |||
293 | td { | ||
294 | padding: 0.25em; | ||
295 | vertical-align: top; | ||
296 | } | ||
297 | |||
298 | p a[id] { | ||
299 | margin: 0px; | ||
300 | padding: 0px; | ||
301 | display: inline; | ||
302 | background-image: none; | ||
303 | } | ||
304 | |||
305 | a { | ||
306 | text-decoration: underline; | ||
307 | color: #444; | ||
308 | } | ||
309 | |||
310 | pre { | ||
311 | overflow: auto; | ||
312 | } | ||
313 | |||
314 | a:hover { | ||
315 | text-decoration: underline; | ||
316 | /*font-weight: bold;*/ | ||
317 | } | ||
318 | |||
319 | /* This style defines how the permalink character | ||
320 | appears by itself and when hovered over with | ||
321 | the mouse. */ | ||
322 | |||
323 | [alt='Permalink'] { color: #eee; } | ||
324 | [alt='Permalink']:hover { color: black; } | ||
325 | |||
326 | |||
327 | div.informalfigure, | ||
328 | div.informalexample, | ||
329 | div.informaltable, | ||
330 | div.figure, | ||
331 | div.table, | ||
332 | div.example { | ||
333 | margin: 1em 0em; | ||
334 | padding: 1em; | ||
335 | page-break-inside: avoid; | ||
336 | } | ||
337 | |||
338 | |||
339 | div.informalfigure p.title b, | ||
340 | div.informalexample p.title b, | ||
341 | div.informaltable p.title b, | ||
342 | div.figure p.title b, | ||
343 | div.example p.title b, | ||
344 | div.table p.title b{ | ||
345 | padding-top: 0em; | ||
346 | margin-top: 0em; | ||
347 | font-size: 100%; | ||
348 | font-weight: normal; | ||
349 | } | ||
350 | |||
351 | .mediaobject .caption, | ||
352 | .mediaobject .caption p { | ||
353 | text-align: center; | ||
354 | font-size: 80%; | ||
355 | padding-top: 0.5em; | ||
356 | padding-bottom: 0.5em; | ||
357 | } | ||
358 | |||
359 | .epigraph { | ||
360 | padding-left: 55%; | ||
361 | margin-bottom: 1em; | ||
362 | } | ||
363 | |||
364 | .epigraph p { | ||
365 | text-align: left; | ||
366 | } | ||
367 | |||
368 | .epigraph .quote { | ||
369 | font-style: italic; | ||
370 | } | ||
371 | .epigraph .attribution { | ||
372 | font-style: normal; | ||
373 | text-align: right; | ||
374 | } | ||
375 | |||
376 | span.application { | ||
377 | font-style: italic; | ||
378 | } | ||
379 | |||
380 | .programlisting { | ||
381 | font-family: monospace; | ||
382 | font-size: 80%; | ||
383 | white-space: pre; | ||
384 | margin: 1.33em 0em; | ||
385 | padding: 1.33em; | ||
386 | } | ||
387 | |||
388 | .tip, | ||
389 | .warning, | ||
390 | .caution, | ||
391 | .note { | ||
392 | margin-top: 1em; | ||
393 | margin-bottom: 1em; | ||
394 | |||
395 | } | ||
396 | |||
397 | /* force full width of table within div */ | ||
398 | .tip table, | ||
399 | .warning table, | ||
400 | .caution table, | ||
401 | .note table { | ||
402 | border: none; | ||
403 | width: 100%; | ||
404 | } | ||
405 | |||
406 | |||
407 | .tip table th, | ||
408 | .warning table th, | ||
409 | .caution table th, | ||
410 | .note table th { | ||
411 | padding: 0.8em 0.0em 0.0em 0.0em; | ||
412 | margin : 0em 0em 0em 0em; | ||
413 | } | ||
414 | |||
415 | .tip p, | ||
416 | .warning p, | ||
417 | .caution p, | ||
418 | .note p { | ||
419 | margin-top: 0.5em; | ||
420 | margin-bottom: 0.5em; | ||
421 | padding-right: 1em; | ||
422 | text-align: left; | ||
423 | } | ||
424 | |||
425 | .acronym { | ||
426 | text-transform: uppercase; | ||
427 | } | ||
428 | |||
429 | b.keycap, | ||
430 | .keycap { | ||
431 | padding: 0.09em 0.3em; | ||
432 | margin: 0em; | ||
433 | } | ||
434 | |||
435 | .itemizedlist li { | ||
436 | clear: none; | ||
437 | } | ||
438 | |||
439 | .filename { | ||
440 | font-size: medium; | ||
441 | font-family: Courier, monospace; | ||
442 | } | ||
443 | |||
444 | |||
445 | div.navheader, div.heading{ | ||
446 | position: absolute; | ||
447 | left: 0em; | ||
448 | top: 0em; | ||
449 | width: 100%; | ||
450 | background-color: #cdf; | ||
451 | width: 100%; | ||
452 | } | ||
453 | |||
454 | div.navfooter, div.footing{ | ||
455 | position: fixed; | ||
456 | left: 0em; | ||
457 | bottom: 0em; | ||
458 | background-color: #eee; | ||
459 | width: 100%; | ||
460 | } | ||
461 | |||
462 | |||
463 | div.navheader td, | ||
464 | div.navfooter td { | ||
465 | font-size: 66%; | ||
466 | } | ||
467 | |||
468 | div.navheader table th { | ||
469 | /*font-family: Georgia, Times, serif;*/ | ||
470 | /*font-size: x-large;*/ | ||
471 | font-size: 80%; | ||
472 | } | ||
473 | |||
474 | div.navheader table { | ||
475 | border-left: 0em; | ||
476 | border-right: 0em; | ||
477 | border-top: 0em; | ||
478 | width: 100%; | ||
479 | } | ||
480 | |||
481 | div.navfooter table { | ||
482 | border-left: 0em; | ||
483 | border-right: 0em; | ||
484 | border-bottom: 0em; | ||
485 | width: 100%; | ||
486 | } | ||
487 | |||
488 | div.navheader table td a, | ||
489 | div.navfooter table td a { | ||
490 | color: #777; | ||
491 | text-decoration: none; | ||
492 | } | ||
493 | |||
494 | /* normal text in the footer */ | ||
495 | div.navfooter table td { | ||
496 | color: black; | ||
497 | } | ||
498 | |||
499 | div.navheader table td a:visited, | ||
500 | div.navfooter table td a:visited { | ||
501 | color: #444; | ||
502 | } | ||
503 | |||
504 | |||
505 | /* links in header and footer */ | ||
506 | div.navheader table td a:hover, | ||
507 | div.navfooter table td a:hover { | ||
508 | text-decoration: underline; | ||
509 | background-color: transparent; | ||
510 | color: #33a; | ||
511 | } | ||
512 | |||
513 | div.navheader hr, | ||
514 | div.navfooter hr { | ||
515 | display: none; | ||
516 | } | ||
517 | |||
518 | |||
519 | .qandaset tr.question td p { | ||
520 | margin: 0em 0em 1em 0em; | ||
521 | padding: 0em 0em 0em 0em; | ||
522 | } | ||
523 | |||
524 | .qandaset tr.answer td p { | ||
525 | margin: 0em 0em 1em 0em; | ||
526 | padding: 0em 0em 0em 0em; | ||
527 | } | ||
528 | .answer td { | ||
529 | padding-bottom: 1.5em; | ||
530 | } | ||
531 | |||
532 | .emphasis { | ||
533 | font-weight: bold; | ||
534 | } | ||
535 | |||
536 | |||
537 | /************* / | ||
538 | / decorations / | ||
539 | / *************/ | ||
540 | |||
541 | .titlepage { | ||
542 | } | ||
543 | |||
544 | .part .title { | ||
545 | } | ||
546 | |||
547 | .subtitle { | ||
548 | border: none; | ||
549 | } | ||
550 | |||
551 | /* | ||
552 | h1 { | ||
553 | border: none; | ||
554 | } | ||
555 | |||
556 | h2 { | ||
557 | border-top: solid 0.2em; | ||
558 | border-bottom: solid 0.06em; | ||
559 | } | ||
560 | |||
561 | h3 { | ||
562 | border-top: 0em; | ||
563 | border-bottom: solid 0.06em; | ||
564 | } | ||
565 | |||
566 | h4 { | ||
567 | border: 0em; | ||
568 | border-bottom: solid 0.06em; | ||
569 | } | ||
570 | |||
571 | h5 { | ||
572 | border: 0em; | ||
573 | } | ||
574 | */ | ||
575 | |||
576 | .programlisting { | ||
577 | border: solid 1px; | ||
578 | } | ||
579 | |||
580 | div.figure, | ||
581 | div.table, | ||
582 | div.informalfigure, | ||
583 | div.informaltable, | ||
584 | div.informalexample, | ||
585 | div.example { | ||
586 | border: 1px solid; | ||
587 | } | ||
588 | |||
589 | |||
590 | |||
591 | .tip, | ||
592 | .warning, | ||
593 | .caution, | ||
594 | .note { | ||
595 | border: 1px solid; | ||
596 | } | ||
597 | |||
598 | .tip table th, | ||
599 | .warning table th, | ||
600 | .caution table th, | ||
601 | .note table th { | ||
602 | border-bottom: 1px solid; | ||
603 | } | ||
604 | |||
605 | .question td { | ||
606 | border-top: 1px solid black; | ||
607 | } | ||
608 | |||
609 | .answer { | ||
610 | } | ||
611 | |||
612 | |||
613 | b.keycap, | ||
614 | .keycap { | ||
615 | border: 1px solid; | ||
616 | } | ||
617 | |||
618 | |||
619 | div.navheader, div.heading{ | ||
620 | border-bottom: 1px solid; | ||
621 | } | ||
622 | |||
623 | |||
624 | div.navfooter, div.footing{ | ||
625 | border-top: 1px solid; | ||
626 | } | ||
627 | |||
628 | /********* / | ||
629 | / colors / | ||
630 | / *********/ | ||
631 | |||
632 | body { | ||
633 | color: #333; | ||
634 | background: white; | ||
635 | } | ||
636 | |||
637 | a { | ||
638 | background: transparent; | ||
639 | } | ||
640 | |||
641 | a:hover { | ||
642 | background-color: #dedede; | ||
643 | } | ||
644 | |||
645 | |||
646 | h1, | ||
647 | h2, | ||
648 | h3, | ||
649 | h4, | ||
650 | h5, | ||
651 | h6, | ||
652 | h7, | ||
653 | h8 { | ||
654 | background-color: transparent; | ||
655 | } | ||
656 | |||
657 | hr { | ||
658 | border-color: #aaa; | ||
659 | } | ||
660 | |||
661 | |||
662 | .tip, .warning, .caution, .note { | ||
663 | border-color: #fff; | ||
664 | } | ||
665 | |||
666 | |||
667 | .tip table th, | ||
668 | .warning table th, | ||
669 | .caution table th, | ||
670 | .note table th { | ||
671 | border-bottom-color: #fff; | ||
672 | } | ||
673 | |||
674 | |||
675 | .warning { | ||
676 | background-color: #f0f0f2; | ||
677 | } | ||
678 | |||
679 | .caution { | ||
680 | background-color: #f0f0f2; | ||
681 | } | ||
682 | |||
683 | .tip { | ||
684 | background-color: #f0f0f2; | ||
685 | } | ||
686 | |||
687 | .note { | ||
688 | background-color: #f0f0f2; | ||
689 | } | ||
690 | |||
691 | .glossary dl dt, | ||
692 | .variablelist dl dt, | ||
693 | .variablelist dl dt span.term { | ||
694 | color: #044; | ||
695 | } | ||
696 | |||
697 | div.figure, | ||
698 | div.table, | ||
699 | div.example, | ||
700 | div.informalfigure, | ||
701 | div.informaltable, | ||
702 | div.informalexample { | ||
703 | border-color: #aaa; | ||
704 | } | ||
705 | |||
706 | pre.programlisting { | ||
707 | color: black; | ||
708 | background-color: #fff; | ||
709 | border-color: #aaa; | ||
710 | border-width: 2px; | ||
711 | } | ||
712 | |||
713 | .guimenu, | ||
714 | .guilabel, | ||
715 | .guimenuitem { | ||
716 | background-color: #eee; | ||
717 | } | ||
718 | |||
719 | |||
720 | b.keycap, | ||
721 | .keycap { | ||
722 | background-color: #eee; | ||
723 | border-color: #999; | ||
724 | } | ||
725 | |||
726 | |||
727 | div.navheader { | ||
728 | border-color: black; | ||
729 | } | ||
730 | |||
731 | |||
732 | div.navfooter { | ||
733 | border-color: black; | ||
734 | } | ||
735 | |||
736 | |||
737 | /*********** / | ||
738 | / graphics / | ||
739 | / ***********/ | ||
740 | |||
741 | /* | ||
742 | body { | ||
743 | background-image: url("images/body_bg.jpg"); | ||
744 | background-attachment: fixed; | ||
745 | } | ||
746 | |||
747 | .navheader, | ||
748 | .note, | ||
749 | .tip { | ||
750 | background-image: url("images/note_bg.jpg"); | ||
751 | background-attachment: fixed; | ||
752 | } | ||
753 | |||
754 | .warning, | ||
755 | .caution { | ||
756 | background-image: url("images/warning_bg.jpg"); | ||
757 | background-attachment: fixed; | ||
758 | } | ||
759 | |||
760 | .figure, | ||
761 | .informalfigure, | ||
762 | .example, | ||
763 | .informalexample, | ||
764 | .table, | ||
765 | .informaltable { | ||
766 | background-image: url("images/figure_bg.jpg"); | ||
767 | background-attachment: fixed; | ||
768 | } | ||
769 | |||
770 | */ | ||
771 | h1, | ||
772 | h2, | ||
773 | h3, | ||
774 | h4, | ||
775 | h5, | ||
776 | h6, | ||
777 | h7{ | ||
778 | } | ||
779 | |||
780 | /* | ||
781 | Example of how to stick an image as part of the title. | ||
782 | |||
783 | div.article .titlepage .title | ||
784 | { | ||
785 | background-image: url("figures/white-on-black.png"); | ||
786 | background-position: center; | ||
787 | background-repeat: repeat-x; | ||
788 | } | ||
789 | */ | ||
790 | |||
791 | div.preface .titlepage .title, | ||
792 | div.colophon .title, | ||
793 | div.chapter .titlepage .title, | ||
794 | div.article .titlepage .title | ||
795 | { | ||
796 | } | ||
797 | |||
798 | div.section div.section .titlepage .title, | ||
799 | div.sect2 .titlepage .title { | ||
800 | background: none; | ||
801 | } | ||
802 | |||
803 | |||
804 | h1.title { | ||
805 | background-color: transparent; | ||
806 | background-repeat: no-repeat; | ||
807 | height: 256px; | ||
808 | text-indent: -9000px; | ||
809 | overflow:hidden; | ||
810 | } | ||
811 | |||
812 | h2.subtitle { | ||
813 | background-color: transparent; | ||
814 | text-indent: -9000px; | ||
815 | overflow:hidden; | ||
816 | width: 0px; | ||
817 | display: none; | ||
818 | } | ||
819 | |||
820 | /*************************************** / | ||
821 | / pippin.gimp.org specific alterations / | ||
822 | / ***************************************/ | ||
823 | |||
824 | /* | ||
825 | div.heading, div.navheader { | ||
826 | color: #777; | ||
827 | font-size: 80%; | ||
828 | padding: 0; | ||
829 | margin: 0; | ||
830 | text-align: left; | ||
831 | position: absolute; | ||
832 | top: 0px; | ||
833 | left: 0px; | ||
834 | width: 100%; | ||
835 | height: 50px; | ||
836 | background: url('/gfx/heading_bg.png') transparent; | ||
837 | background-repeat: repeat-x; | ||
838 | background-attachment: fixed; | ||
839 | border: none; | ||
840 | } | ||
841 | |||
842 | div.heading a { | ||
843 | color: #444; | ||
844 | } | ||
845 | |||
846 | div.footing, div.navfooter { | ||
847 | border: none; | ||
848 | color: #ddd; | ||
849 | font-size: 80%; | ||
850 | text-align:right; | ||
851 | |||
852 | width: 100%; | ||
853 | padding-top: 10px; | ||
854 | position: absolute; | ||
855 | bottom: 0px; | ||
856 | left: 0px; | ||
857 | |||
858 | background: url('/gfx/footing_bg.png') transparent; | ||
859 | } | ||
860 | */ | ||
861 | |||
862 | |||
863 | |||
864 | /****************** / | ||
865 | / nasty ie tweaks / | ||
866 | / ******************/ | ||
867 | |||
868 | /* | ||
869 | div.heading, div.navheader { | ||
870 | width:expression(document.body.clientWidth + "px"); | ||
871 | } | ||
872 | |||
873 | div.footing, div.navfooter { | ||
874 | width:expression(document.body.clientWidth + "px"); | ||
875 | margin-left:expression("-5em"); | ||
876 | } | ||
877 | body { | ||
878 | padding:expression("4em 5em 0em 5em"); | ||
879 | } | ||
880 | */ | ||
881 | |||
882 | /**************************************** / | ||
883 | / mozilla vendor specific css extensions / | ||
884 | / ****************************************/ | ||
885 | /* | ||
886 | div.navfooter, div.footing{ | ||
887 | -moz-opacity: 0.8em; | ||
888 | } | ||
889 | |||
890 | div.figure, | ||
891 | div.table, | ||
892 | div.informalfigure, | ||
893 | div.informaltable, | ||
894 | div.informalexample, | ||
895 | div.example, | ||
896 | .tip, | ||
897 | .warning, | ||
898 | .caution, | ||
899 | .note { | ||
900 | -moz-border-radius: 0.5em; | ||
901 | } | ||
902 | |||
903 | b.keycap, | ||
904 | .keycap { | ||
905 | -moz-border-radius: 0.3em; | ||
906 | } | ||
907 | */ | ||
908 | |||
909 | table tr td table tr td { | ||
910 | display: none; | ||
911 | } | ||
912 | |||
913 | |||
914 | hr { | ||
915 | display: none; | ||
916 | } | ||
917 | |||
918 | table { | ||
919 | border: 0em; | ||
920 | } | ||
921 | |||
922 | .photo { | ||
923 | float: right; | ||
924 | margin-left: 1.5em; | ||
925 | margin-bottom: 1.5em; | ||
926 | margin-top: 0em; | ||
927 | max-width: 17em; | ||
928 | border: 1px solid gray; | ||
929 | padding: 3px; | ||
930 | background: white; | ||
931 | } | ||
932 | .seperator { | ||
933 | padding-top: 2em; | ||
934 | clear: both; | ||
935 | } | ||
936 | |||
937 | #validators { | ||
938 | margin-top: 5em; | ||
939 | text-align: right; | ||
940 | color: #777; | ||
941 | } | ||
942 | @media print { | ||
943 | body { | ||
944 | font-size: 8pt; | ||
945 | } | ||
946 | .noprint { | ||
947 | display: none; | ||
948 | } | ||
949 | } | ||
950 | |||
951 | |||
952 | .tip, | ||
953 | .note { | ||
954 | background: #f0f0f2; | ||
955 | color: #333; | ||
956 | padding: 20px; | ||
957 | margin: 20px; | ||
958 | } | ||
959 | |||
960 | .tip h3, | ||
961 | .note h3 { | ||
962 | padding: 0em; | ||
963 | margin: 0em; | ||
964 | font-size: 2em; | ||
965 | font-weight: bold; | ||
966 | color: #333; | ||
967 | } | ||
968 | |||
969 | .tip a, | ||
970 | .note a { | ||
971 | color: #333; | ||
972 | text-decoration: underline; | ||
973 | } | ||
974 | |||
975 | .footnote { | ||
976 | font-size: small; | ||
977 | color: #333; | ||
978 | } | ||
979 | |||
980 | /* Changes the announcement text */ | ||
981 | .tip h3, | ||
982 | .warning h3, | ||
983 | .caution h3, | ||
984 | .note h3 { | ||
985 | font-size:large; | ||
986 | color: #00557D; | ||
987 | } | ||
diff --git a/documentation/profile-manual/profile-manual-usage.xml b/documentation/profile-manual/profile-manual-usage.xml deleted file mode 100644 index 3a7148cbd3..0000000000 --- a/documentation/profile-manual/profile-manual-usage.xml +++ /dev/null | |||
@@ -1,2986 +0,0 @@ | |||
1 | <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
2 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" | ||
3 | [<!ENTITY % poky SYSTEM "../poky.ent"> %poky; ] > | ||
4 | <!--SPDX-License-Identifier: CC-BY-2.0-UK--> | ||
5 | |||
6 | <chapter id='profile-manual-usage'> | ||
7 | |||
8 | <title>Basic Usage (with examples) for each of the Yocto Tracing Tools</title> | ||
9 | |||
10 | <para> | ||
11 | This chapter presents basic usage examples for each of the tracing | ||
12 | tools. | ||
13 | </para> | ||
14 | |||
15 | <section id='profile-manual-perf'> | ||
16 | <title>perf</title> | ||
17 | |||
18 | <para> | ||
19 | The 'perf' tool is the profiling and tracing tool that comes | ||
20 | bundled with the Linux kernel. | ||
21 | </para> | ||
22 | |||
23 | <para> | ||
24 | Don't let the fact that it's part of the kernel fool you into thinking | ||
25 | that it's only for tracing and profiling the kernel - you can indeed | ||
26 | use it to trace and profile just the kernel, but you can also use it | ||
27 | to profile specific applications separately (with or without kernel | ||
28 | context), and you can also use it to trace and profile the kernel | ||
29 | and all applications on the system simultaneously to gain a system-wide | ||
30 | view of what's going on. | ||
31 | </para> | ||
32 | |||
33 | <para> | ||
34 | In many ways, perf aims to be a superset of all the tracing and profiling | ||
35 | tools available in Linux today, including all the other tools covered | ||
36 | in this HOWTO. The past couple of years have seen perf subsume a lot | ||
37 | of the functionality of those other tools and, at the same time, those | ||
38 | other tools have removed large portions of their previous functionality | ||
39 | and replaced it with calls to the equivalent functionality now | ||
40 | implemented by the perf subsystem. Extrapolation suggests that at | ||
41 | some point those other tools will simply become completely redundant | ||
42 | and go away; until then, we'll cover those other tools in these pages | ||
43 | and in many cases show how the same things can be accomplished in | ||
44 | perf and the other tools when it seems useful to do so. | ||
45 | </para> | ||
46 | |||
47 | <para> | ||
48 | The coverage below details some of the most common ways you'll likely | ||
49 | want to apply the tool; full documentation can be found either within | ||
50 | the tool itself or in the man pages at | ||
51 | <ulink url='http://linux.die.net/man/1/perf'>perf(1)</ulink>. | ||
52 | </para> | ||
53 | |||
54 | <section id='perf-setup'> | ||
55 | <title>Setup</title> | ||
56 | |||
57 | <para> | ||
58 | For this section, we'll assume you've already performed the basic | ||
59 | setup outlined in the General Setup section. | ||
60 | </para> | ||
61 | |||
62 | <para> | ||
63 | In particular, you'll get the most mileage out of perf if you | ||
64 | profile an image built with the following in your | ||
65 | <filename>local.conf</filename> file: | ||
66 | <literallayout class='monospaced'> | ||
67 | <ulink url='&YOCTO_DOCS_REF_URL;#var-INHIBIT_PACKAGE_STRIP'>INHIBIT_PACKAGE_STRIP</ulink> = "1" | ||
68 | </literallayout> | ||
69 | </para> | ||
70 | |||
71 | <para> | ||
72 | perf runs on the target system for the most part. You can archive | ||
73 | profile data and copy it to the host for analysis, but for the | ||
74 | rest of this document we assume you've ssh'ed to the host and | ||
75 | will be running the perf commands on the target. | ||
76 | </para> | ||
77 | </section> | ||
78 | |||
79 | <section id='perf-basic-usage'> | ||
80 | <title>Basic Usage</title> | ||
81 | |||
82 | <para> | ||
83 | The perf tool is pretty much self-documenting. To remind yourself | ||
84 | of the available commands, simply type 'perf', which will show you | ||
85 | basic usage along with the available perf subcommands: | ||
86 | <literallayout class='monospaced'> | ||
87 | root@crownbay:~# perf | ||
88 | |||
89 | usage: perf [--version] [--help] COMMAND [ARGS] | ||
90 | |||
91 | The most commonly used perf commands are: | ||
92 | annotate Read perf.data (created by perf record) and display annotated code | ||
93 | archive Create archive with object files with build-ids found in perf.data file | ||
94 | bench General framework for benchmark suites | ||
95 | buildid-cache Manage build-id cache. | ||
96 | buildid-list List the buildids in a perf.data file | ||
97 | diff Read two perf.data files and display the differential profile | ||
98 | evlist List the event names in a perf.data file | ||
99 | inject Filter to augment the events stream with additional information | ||
100 | kmem Tool to trace/measure kernel memory(slab) properties | ||
101 | kvm Tool to trace/measure kvm guest os | ||
102 | list List all symbolic event types | ||
103 | lock Analyze lock events | ||
104 | probe Define new dynamic tracepoints | ||
105 | record Run a command and record its profile into perf.data | ||
106 | report Read perf.data (created by perf record) and display the profile | ||
107 | sched Tool to trace/measure scheduler properties (latencies) | ||
108 | script Read perf.data (created by perf record) and display trace output | ||
109 | stat Run a command and gather performance counter statistics | ||
110 | test Runs sanity tests. | ||
111 | timechart Tool to visualize total system behavior during a workload | ||
112 | top System profiling tool. | ||
113 | |||
114 | See 'perf help COMMAND' for more information on a specific command. | ||
115 | </literallayout> | ||
116 | </para> | ||
117 | |||
118 | <section id='using-perf-to-do-basic-profiling'> | ||
119 | <title>Using perf to do Basic Profiling</title> | ||
120 | |||
121 | <para> | ||
122 | As a simple test case, we'll profile the 'wget' of a fairly large | ||
123 | file, which is a minimally interesting case because it has both | ||
124 | file and network I/O aspects, and at least in the case of standard | ||
125 | Yocto images, it's implemented as part of busybox, so the methods | ||
126 | we use to analyze it can be used in a very similar way to the whole | ||
127 | host of supported busybox applets in Yocto. | ||
128 | <literallayout class='monospaced'> | ||
129 | root@crownbay:~# rm linux-2.6.19.2.tar.bz2; \ | ||
130 | wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
131 | </literallayout> | ||
132 | The quickest and easiest way to get some basic overall data about | ||
133 | what's going on for a particular workload is to profile it using | ||
134 | 'perf stat'. 'perf stat' basically profiles using a few default | ||
135 | counters and displays the summed counts at the end of the run: | ||
136 | <literallayout class='monospaced'> | ||
137 | root@crownbay:~# perf stat wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
138 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
139 | linux-2.6.19.2.tar.b 100% |***************************************************| 41727k 0:00:00 ETA | ||
140 | |||
141 | Performance counter stats for 'wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>': | ||
142 | |||
143 | 4597.223902 task-clock # 0.077 CPUs utilized | ||
144 | 23568 context-switches # 0.005 M/sec | ||
145 | 68 CPU-migrations # 0.015 K/sec | ||
146 | 241 page-faults # 0.052 K/sec | ||
147 | 3045817293 cycles # 0.663 GHz | ||
148 | <not supported> stalled-cycles-frontend | ||
149 | <not supported> stalled-cycles-backend | ||
150 | 858909167 instructions # 0.28 insns per cycle | ||
151 | 165441165 branches # 35.987 M/sec | ||
152 | 19550329 branch-misses # 11.82% of all branches | ||
153 | |||
154 | 59.836627620 seconds time elapsed | ||
155 | </literallayout> | ||
156 | Many times such a simple-minded test doesn't yield much of | ||
157 | interest, but sometimes it does (see Real-world Yocto bug | ||
158 | (slow loop-mounted write speed)). | ||
159 | </para> | ||
160 | |||
161 | <para> | ||
162 | Also, note that 'perf stat' isn't restricted to a fixed set of | ||
163 | counters - basically any event listed in the output of 'perf list' | ||
164 | can be tallied by 'perf stat'. For example, suppose we wanted to | ||
165 | see a summary of all the events related to kernel memory | ||
166 | allocation/freeing along with cache hits and misses: | ||
167 | <literallayout class='monospaced'> | ||
168 | root@crownbay:~# perf stat -e kmem:* -e cache-references -e cache-misses wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
169 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
170 | linux-2.6.19.2.tar.b 100% |***************************************************| 41727k 0:00:00 ETA | ||
171 | |||
172 | Performance counter stats for 'wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>': | ||
173 | |||
174 | 5566 kmem:kmalloc | ||
175 | 125517 kmem:kmem_cache_alloc | ||
176 | 0 kmem:kmalloc_node | ||
177 | 0 kmem:kmem_cache_alloc_node | ||
178 | 34401 kmem:kfree | ||
179 | 69920 kmem:kmem_cache_free | ||
180 | 133 kmem:mm_page_free | ||
181 | 41 kmem:mm_page_free_batched | ||
182 | 11502 kmem:mm_page_alloc | ||
183 | 11375 kmem:mm_page_alloc_zone_locked | ||
184 | 0 kmem:mm_page_pcpu_drain | ||
185 | 0 kmem:mm_page_alloc_extfrag | ||
186 | 66848602 cache-references | ||
187 | 2917740 cache-misses # 4.365 % of all cache refs | ||
188 | |||
189 | 44.831023415 seconds time elapsed | ||
190 | </literallayout> | ||
191 | So 'perf stat' gives us a nice easy way to get a quick overview of | ||
192 | what might be happening for a set of events, but normally we'd | ||
193 | need a little more detail in order to understand what's going on | ||
194 | in a way that we can act on in a useful way. | ||
195 | </para> | ||
196 | |||
197 | <para> | ||
198 | To dive down into a next level of detail, we can use 'perf | ||
199 | record'/'perf report' which will collect profiling data and | ||
200 | present it to use using an interactive text-based UI (or | ||
201 | simply as text if we specify --stdio to 'perf report'). | ||
202 | </para> | ||
203 | |||
204 | <para> | ||
205 | As our first attempt at profiling this workload, we'll simply | ||
206 | run 'perf record', handing it the workload we want to profile | ||
207 | (everything after 'perf record' and any perf options we hand | ||
208 | it - here none - will be executed in a new shell). perf collects | ||
209 | samples until the process exits and records them in a file named | ||
210 | 'perf.data' in the current working directory. | ||
211 | <literallayout class='monospaced'> | ||
212 | root@crownbay:~# perf record wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
213 | |||
214 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
215 | linux-2.6.19.2.tar.b 100% |************************************************| 41727k 0:00:00 ETA | ||
216 | [ perf record: Woken up 1 times to write data ] | ||
217 | [ perf record: Captured and wrote 0.176 MB perf.data (~7700 samples) ] | ||
218 | </literallayout> | ||
219 | To see the results in a 'text-based UI' (tui), simply run | ||
220 | 'perf report', which will read the perf.data file in the current | ||
221 | working directory and display the results in an interactive UI: | ||
222 | <literallayout class='monospaced'> | ||
223 | root@crownbay:~# perf report | ||
224 | </literallayout> | ||
225 | </para> | ||
226 | |||
227 | <para> | ||
228 | <imagedata fileref="figures/perf-wget-flat-stripped.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
229 | </para> | ||
230 | |||
231 | <para> | ||
232 | The above screenshot displays a 'flat' profile, one entry for | ||
233 | each 'bucket' corresponding to the functions that were profiled | ||
234 | during the profiling run, ordered from the most popular to the | ||
235 | least (perf has options to sort in various orders and keys as | ||
236 | well as display entries only above a certain threshold and so | ||
237 | on - see the perf documentation for details). Note that this | ||
238 | includes both userspace functions (entries containing a [.]) and | ||
239 | kernel functions accounted to the process (entries containing | ||
240 | a [k]). (perf has command-line modifiers that can be used to | ||
241 | restrict the profiling to kernel or userspace, among others). | ||
242 | </para> | ||
243 | |||
244 | <para> | ||
245 | Notice also that the above report shows an entry for 'busybox', | ||
246 | which is the executable that implements 'wget' in Yocto, but that | ||
247 | instead of a useful function name in that entry, it displays | ||
248 | a not-so-friendly hex value instead. The steps below will show | ||
249 | how to fix that problem. | ||
250 | </para> | ||
251 | |||
252 | <para> | ||
253 | Before we do that, however, let's try running a different profile, | ||
254 | one which shows something a little more interesting. The only | ||
255 | difference between the new profile and the previous one is that | ||
256 | we'll add the -g option, which will record not just the address | ||
257 | of a sampled function, but the entire callchain to the sampled | ||
258 | function as well: | ||
259 | <literallayout class='monospaced'> | ||
260 | root@crownbay:~# perf record -g wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
261 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
262 | linux-2.6.19.2.tar.b 100% |************************************************| 41727k 0:00:00 ETA | ||
263 | [ perf record: Woken up 3 times to write data ] | ||
264 | [ perf record: Captured and wrote 0.652 MB perf.data (~28476 samples) ] | ||
265 | |||
266 | |||
267 | root@crownbay:~# perf report | ||
268 | </literallayout> | ||
269 | </para> | ||
270 | |||
271 | <para> | ||
272 | <imagedata fileref="figures/perf-wget-g-copy-to-user-expanded-stripped.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
273 | </para> | ||
274 | |||
275 | <para> | ||
276 | Using the callgraph view, we can actually see not only which | ||
277 | functions took the most time, but we can also see a summary of | ||
278 | how those functions were called and learn something about how the | ||
279 | program interacts with the kernel in the process. | ||
280 | </para> | ||
281 | |||
282 | <para> | ||
283 | Notice that each entry in the above screenshot now contains a '+' | ||
284 | on the left-hand side. This means that we can expand the entry and | ||
285 | drill down into the callchains that feed into that entry. | ||
286 | Pressing 'enter' on any one of them will expand the callchain | ||
287 | (you can also press 'E' to expand them all at the same time or 'C' | ||
288 | to collapse them all). | ||
289 | </para> | ||
290 | |||
291 | <para> | ||
292 | In the screenshot above, we've toggled the __copy_to_user_ll() | ||
293 | entry and several subnodes all the way down. This lets us see | ||
294 | which callchains contributed to the profiled __copy_to_user_ll() | ||
295 | function which contributed 1.77% to the total profile. | ||
296 | </para> | ||
297 | |||
298 | <para> | ||
299 | As a bit of background explanation for these callchains, think | ||
300 | about what happens at a high level when you run wget to get a file | ||
301 | out on the network. Basically what happens is that the data comes | ||
302 | into the kernel via the network connection (socket) and is passed | ||
303 | to the userspace program 'wget' (which is actually a part of | ||
304 | busybox, but that's not important for now), which takes the buffers | ||
305 | the kernel passes to it and writes it to a disk file to save it. | ||
306 | </para> | ||
307 | |||
308 | <para> | ||
309 | The part of this process that we're looking at in the above call | ||
310 | stacks is the part where the kernel passes the data it's read from | ||
311 | the socket down to wget i.e. a copy-to-user. | ||
312 | </para> | ||
313 | |||
314 | <para> | ||
315 | Notice also that here there's also a case where the hex value | ||
316 | is displayed in the callstack, here in the expanded | ||
317 | sys_clock_gettime() function. Later we'll see it resolve to a | ||
318 | userspace function call in busybox. | ||
319 | </para> | ||
320 | |||
321 | <para> | ||
322 | <imagedata fileref="figures/perf-wget-g-copy-from-user-expanded-stripped.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
323 | </para> | ||
324 | |||
325 | <para> | ||
326 | The above screenshot shows the other half of the journey for the | ||
327 | data - from the wget program's userspace buffers to disk. To get | ||
328 | the buffers to disk, the wget program issues a write(2), which | ||
329 | does a copy-from-user to the kernel, which then takes care via | ||
330 | some circuitous path (probably also present somewhere in the | ||
331 | profile data), to get it safely to disk. | ||
332 | </para> | ||
333 | |||
334 | <para> | ||
335 | Now that we've seen the basic layout of the profile data and the | ||
336 | basics of how to extract useful information out of it, let's get | ||
337 | back to the task at hand and see if we can get some basic idea | ||
338 | about where the time is spent in the program we're profiling, | ||
339 | wget. Remember that wget is actually implemented as an applet | ||
340 | in busybox, so while the process name is 'wget', the executable | ||
341 | we're actually interested in is busybox. So let's expand the | ||
342 | first entry containing busybox: | ||
343 | </para> | ||
344 | |||
345 | <para> | ||
346 | <imagedata fileref="figures/perf-wget-busybox-expanded-stripped.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
347 | </para> | ||
348 | |||
349 | <para> | ||
350 | Again, before we expanded we saw that the function was labeled | ||
351 | with a hex value instead of a symbol as with most of the kernel | ||
352 | entries. Expanding the busybox entry doesn't make it any better. | ||
353 | </para> | ||
354 | |||
355 | <para> | ||
356 | The problem is that perf can't find the symbol information for the | ||
357 | busybox binary, which is actually stripped out by the Yocto build | ||
358 | system. | ||
359 | </para> | ||
360 | |||
361 | <para> | ||
362 | One way around that is to put the following in your | ||
363 | <filename>local.conf</filename> file when you build the image: | ||
364 | <literallayout class='monospaced'> | ||
365 | <ulink url='&YOCTO_DOCS_REF_URL;#var-INHIBIT_PACKAGE_STRIP'>INHIBIT_PACKAGE_STRIP</ulink> = "1" | ||
366 | </literallayout> | ||
367 | However, we already have an image with the binaries stripped, | ||
368 | so what can we do to get perf to resolve the symbols? Basically | ||
369 | we need to install the debuginfo for the busybox package. | ||
370 | </para> | ||
371 | |||
372 | <para> | ||
373 | To generate the debug info for the packages in the image, we can | ||
374 | add dbg-pkgs to EXTRA_IMAGE_FEATURES in local.conf. For example: | ||
375 | <literallayout class='monospaced'> | ||
376 | EXTRA_IMAGE_FEATURES = "debug-tweaks tools-profile dbg-pkgs" | ||
377 | </literallayout> | ||
378 | Additionally, in order to generate the type of debuginfo that | ||
379 | perf understands, we also need to set | ||
380 | <ulink url='&YOCTO_DOCS_REF_URL;#var-PACKAGE_DEBUG_SPLIT_STYLE'><filename>PACKAGE_DEBUG_SPLIT_STYLE</filename></ulink> | ||
381 | in the <filename>local.conf</filename> file: | ||
382 | <literallayout class='monospaced'> | ||
383 | PACKAGE_DEBUG_SPLIT_STYLE = 'debug-file-directory' | ||
384 | </literallayout> | ||
385 | Once we've done that, we can install the debuginfo for busybox. | ||
386 | The debug packages once built can be found in | ||
387 | build/tmp/deploy/rpm/* on the host system. Find the | ||
388 | busybox-dbg-...rpm file and copy it to the target. For example: | ||
389 | <literallayout class='monospaced'> | ||
390 | [trz@empanada core2]$ scp /home/trz/yocto/crownbay-tracing-dbg/build/tmp/deploy/rpm/core2_32/busybox-dbg-1.20.2-r2.core2_32.rpm root@192.168.1.31: | ||
391 | root@192.168.1.31's password: | ||
392 | busybox-dbg-1.20.2-r2.core2_32.rpm 100% 1826KB 1.8MB/s 00:01 | ||
393 | </literallayout> | ||
394 | Now install the debug rpm on the target: | ||
395 | <literallayout class='monospaced'> | ||
396 | root@crownbay:~# rpm -i busybox-dbg-1.20.2-r2.core2_32.rpm | ||
397 | </literallayout> | ||
398 | Now that the debuginfo is installed, we see that the busybox | ||
399 | entries now display their functions symbolically: | ||
400 | </para> | ||
401 | |||
402 | <para> | ||
403 | <imagedata fileref="figures/perf-wget-busybox-debuginfo.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
404 | </para> | ||
405 | |||
406 | <para> | ||
407 | If we expand one of the entries and press 'enter' on a leaf node, | ||
408 | we're presented with a menu of actions we can take to get more | ||
409 | information related to that entry: | ||
410 | </para> | ||
411 | |||
412 | <para> | ||
413 | <imagedata fileref="figures/perf-wget-busybox-dso-zoom-menu.png" width="6in" depth="2in" align="center" scalefit="1" /> | ||
414 | </para> | ||
415 | |||
416 | <para> | ||
417 | One of these actions allows us to show a view that displays a | ||
418 | busybox-centric view of the profiled functions (in this case we've | ||
419 | also expanded all the nodes using the 'E' key): | ||
420 | </para> | ||
421 | |||
422 | <para> | ||
423 | <imagedata fileref="figures/perf-wget-busybox-dso-zoom.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
424 | </para> | ||
425 | |||
426 | <para> | ||
427 | Finally, we can see that now that the busybox debuginfo is | ||
428 | installed, the previously unresolved symbol in the | ||
429 | sys_clock_gettime() entry mentioned previously is now resolved, | ||
430 | and shows that the sys_clock_gettime system call that was the | ||
431 | source of 6.75% of the copy-to-user overhead was initiated by | ||
432 | the handle_input() busybox function: | ||
433 | </para> | ||
434 | |||
435 | <para> | ||
436 | <imagedata fileref="figures/perf-wget-g-copy-to-user-expanded-debuginfo.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
437 | </para> | ||
438 | |||
439 | <para> | ||
440 | At the lowest level of detail, we can dive down to the assembly | ||
441 | level and see which instructions caused the most overhead in a | ||
442 | function. Pressing 'enter' on the 'udhcpc_main' function, we're | ||
443 | again presented with a menu: | ||
444 | </para> | ||
445 | |||
446 | <para> | ||
447 | <imagedata fileref="figures/perf-wget-busybox-annotate-menu.png" width="6in" depth="2in" align="center" scalefit="1" /> | ||
448 | </para> | ||
449 | |||
450 | <para> | ||
451 | Selecting 'Annotate udhcpc_main', we get a detailed listing of | ||
452 | percentages by instruction for the udhcpc_main function. From the | ||
453 | display, we can see that over 50% of the time spent in this | ||
454 | function is taken up by a couple tests and the move of a | ||
455 | constant (1) to a register: | ||
456 | </para> | ||
457 | |||
458 | <para> | ||
459 | <imagedata fileref="figures/perf-wget-busybox-annotate-udhcpc.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
460 | </para> | ||
461 | |||
462 | <para> | ||
463 | As a segue into tracing, let's try another profile using a | ||
464 | different counter, something other than the default 'cycles'. | ||
465 | </para> | ||
466 | |||
467 | <para> | ||
468 | The tracing and profiling infrastructure in Linux has become | ||
469 | unified in a way that allows us to use the same tool with a | ||
470 | completely different set of counters, not just the standard | ||
471 | hardware counters that traditional tools have had to restrict | ||
472 | themselves to (of course the traditional tools can also make use | ||
473 | of the expanded possibilities now available to them, and in some | ||
474 | cases have, as mentioned previously). | ||
475 | </para> | ||
476 | |||
477 | <para> | ||
478 | We can get a list of the available events that can be used to | ||
479 | profile a workload via 'perf list': | ||
480 | <literallayout class='monospaced'> | ||
481 | root@crownbay:~# perf list | ||
482 | |||
483 | List of pre-defined events (to be used in -e): | ||
484 | cpu-cycles OR cycles [Hardware event] | ||
485 | stalled-cycles-frontend OR idle-cycles-frontend [Hardware event] | ||
486 | stalled-cycles-backend OR idle-cycles-backend [Hardware event] | ||
487 | instructions [Hardware event] | ||
488 | cache-references [Hardware event] | ||
489 | cache-misses [Hardware event] | ||
490 | branch-instructions OR branches [Hardware event] | ||
491 | branch-misses [Hardware event] | ||
492 | bus-cycles [Hardware event] | ||
493 | ref-cycles [Hardware event] | ||
494 | |||
495 | cpu-clock [Software event] | ||
496 | task-clock [Software event] | ||
497 | page-faults OR faults [Software event] | ||
498 | minor-faults [Software event] | ||
499 | major-faults [Software event] | ||
500 | context-switches OR cs [Software event] | ||
501 | cpu-migrations OR migrations [Software event] | ||
502 | alignment-faults [Software event] | ||
503 | emulation-faults [Software event] | ||
504 | |||
505 | L1-dcache-loads [Hardware cache event] | ||
506 | L1-dcache-load-misses [Hardware cache event] | ||
507 | L1-dcache-prefetch-misses [Hardware cache event] | ||
508 | L1-icache-loads [Hardware cache event] | ||
509 | L1-icache-load-misses [Hardware cache event] | ||
510 | . | ||
511 | . | ||
512 | . | ||
513 | rNNN [Raw hardware event descriptor] | ||
514 | cpu/t1=v1[,t2=v2,t3 ...]/modifier [Raw hardware event descriptor] | ||
515 | (see 'perf list --help' on how to encode it) | ||
516 | |||
517 | mem:<addr>[:access] [Hardware breakpoint] | ||
518 | |||
519 | sunrpc:rpc_call_status [Tracepoint event] | ||
520 | sunrpc:rpc_bind_status [Tracepoint event] | ||
521 | sunrpc:rpc_connect_status [Tracepoint event] | ||
522 | sunrpc:rpc_task_begin [Tracepoint event] | ||
523 | skb:kfree_skb [Tracepoint event] | ||
524 | skb:consume_skb [Tracepoint event] | ||
525 | skb:skb_copy_datagram_iovec [Tracepoint event] | ||
526 | net:net_dev_xmit [Tracepoint event] | ||
527 | net:net_dev_queue [Tracepoint event] | ||
528 | net:netif_receive_skb [Tracepoint event] | ||
529 | net:netif_rx [Tracepoint event] | ||
530 | napi:napi_poll [Tracepoint event] | ||
531 | sock:sock_rcvqueue_full [Tracepoint event] | ||
532 | sock:sock_exceed_buf_limit [Tracepoint event] | ||
533 | udp:udp_fail_queue_rcv_skb [Tracepoint event] | ||
534 | hda:hda_send_cmd [Tracepoint event] | ||
535 | hda:hda_get_response [Tracepoint event] | ||
536 | hda:hda_bus_reset [Tracepoint event] | ||
537 | scsi:scsi_dispatch_cmd_start [Tracepoint event] | ||
538 | scsi:scsi_dispatch_cmd_error [Tracepoint event] | ||
539 | scsi:scsi_eh_wakeup [Tracepoint event] | ||
540 | drm:drm_vblank_event [Tracepoint event] | ||
541 | drm:drm_vblank_event_queued [Tracepoint event] | ||
542 | drm:drm_vblank_event_delivered [Tracepoint event] | ||
543 | random:mix_pool_bytes [Tracepoint event] | ||
544 | random:mix_pool_bytes_nolock [Tracepoint event] | ||
545 | random:credit_entropy_bits [Tracepoint event] | ||
546 | gpio:gpio_direction [Tracepoint event] | ||
547 | gpio:gpio_value [Tracepoint event] | ||
548 | block:block_rq_abort [Tracepoint event] | ||
549 | block:block_rq_requeue [Tracepoint event] | ||
550 | block:block_rq_issue [Tracepoint event] | ||
551 | block:block_bio_bounce [Tracepoint event] | ||
552 | block:block_bio_complete [Tracepoint event] | ||
553 | block:block_bio_backmerge [Tracepoint event] | ||
554 | . | ||
555 | . | ||
556 | writeback:writeback_wake_thread [Tracepoint event] | ||
557 | writeback:writeback_wake_forker_thread [Tracepoint event] | ||
558 | writeback:writeback_bdi_register [Tracepoint event] | ||
559 | . | ||
560 | . | ||
561 | writeback:writeback_single_inode_requeue [Tracepoint event] | ||
562 | writeback:writeback_single_inode [Tracepoint event] | ||
563 | kmem:kmalloc [Tracepoint event] | ||
564 | kmem:kmem_cache_alloc [Tracepoint event] | ||
565 | kmem:mm_page_alloc [Tracepoint event] | ||
566 | kmem:mm_page_alloc_zone_locked [Tracepoint event] | ||
567 | kmem:mm_page_pcpu_drain [Tracepoint event] | ||
568 | kmem:mm_page_alloc_extfrag [Tracepoint event] | ||
569 | vmscan:mm_vmscan_kswapd_sleep [Tracepoint event] | ||
570 | vmscan:mm_vmscan_kswapd_wake [Tracepoint event] | ||
571 | vmscan:mm_vmscan_wakeup_kswapd [Tracepoint event] | ||
572 | vmscan:mm_vmscan_direct_reclaim_begin [Tracepoint event] | ||
573 | . | ||
574 | . | ||
575 | module:module_get [Tracepoint event] | ||
576 | module:module_put [Tracepoint event] | ||
577 | module:module_request [Tracepoint event] | ||
578 | sched:sched_kthread_stop [Tracepoint event] | ||
579 | sched:sched_wakeup [Tracepoint event] | ||
580 | sched:sched_wakeup_new [Tracepoint event] | ||
581 | sched:sched_process_fork [Tracepoint event] | ||
582 | sched:sched_process_exec [Tracepoint event] | ||
583 | sched:sched_stat_runtime [Tracepoint event] | ||
584 | rcu:rcu_utilization [Tracepoint event] | ||
585 | workqueue:workqueue_queue_work [Tracepoint event] | ||
586 | workqueue:workqueue_execute_end [Tracepoint event] | ||
587 | signal:signal_generate [Tracepoint event] | ||
588 | signal:signal_deliver [Tracepoint event] | ||
589 | timer:timer_init [Tracepoint event] | ||
590 | timer:timer_start [Tracepoint event] | ||
591 | timer:hrtimer_cancel [Tracepoint event] | ||
592 | timer:itimer_state [Tracepoint event] | ||
593 | timer:itimer_expire [Tracepoint event] | ||
594 | irq:irq_handler_entry [Tracepoint event] | ||
595 | irq:irq_handler_exit [Tracepoint event] | ||
596 | irq:softirq_entry [Tracepoint event] | ||
597 | irq:softirq_exit [Tracepoint event] | ||
598 | irq:softirq_raise [Tracepoint event] | ||
599 | printk:console [Tracepoint event] | ||
600 | task:task_newtask [Tracepoint event] | ||
601 | task:task_rename [Tracepoint event] | ||
602 | syscalls:sys_enter_socketcall [Tracepoint event] | ||
603 | syscalls:sys_exit_socketcall [Tracepoint event] | ||
604 | . | ||
605 | . | ||
606 | . | ||
607 | syscalls:sys_enter_unshare [Tracepoint event] | ||
608 | syscalls:sys_exit_unshare [Tracepoint event] | ||
609 | raw_syscalls:sys_enter [Tracepoint event] | ||
610 | raw_syscalls:sys_exit [Tracepoint event] | ||
611 | </literallayout> | ||
612 | </para> | ||
613 | |||
614 | <informalexample> | ||
615 | <emphasis>Tying it Together:</emphasis> These are exactly the same set of events defined | ||
616 | by the trace event subsystem and exposed by | ||
617 | ftrace/tracecmd/kernelshark as files in | ||
618 | /sys/kernel/debug/tracing/events, by SystemTap as | ||
619 | kernel.trace("tracepoint_name") and (partially) accessed by LTTng. | ||
620 | </informalexample> | ||
621 | |||
622 | <para> | ||
623 | Only a subset of these would be of interest to us when looking at | ||
624 | this workload, so let's choose the most likely subsystems | ||
625 | (identified by the string before the colon in the Tracepoint events) | ||
626 | and do a 'perf stat' run using only those wildcarded subsystems: | ||
627 | <literallayout class='monospaced'> | ||
628 | root@crownbay:~# perf stat -e skb:* -e net:* -e napi:* -e sched:* -e workqueue:* -e irq:* -e syscalls:* wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
629 | Performance counter stats for 'wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>': | ||
630 | |||
631 | 23323 skb:kfree_skb | ||
632 | 0 skb:consume_skb | ||
633 | 49897 skb:skb_copy_datagram_iovec | ||
634 | 6217 net:net_dev_xmit | ||
635 | 6217 net:net_dev_queue | ||
636 | 7962 net:netif_receive_skb | ||
637 | 2 net:netif_rx | ||
638 | 8340 napi:napi_poll | ||
639 | 0 sched:sched_kthread_stop | ||
640 | 0 sched:sched_kthread_stop_ret | ||
641 | 3749 sched:sched_wakeup | ||
642 | 0 sched:sched_wakeup_new | ||
643 | 0 sched:sched_switch | ||
644 | 29 sched:sched_migrate_task | ||
645 | 0 sched:sched_process_free | ||
646 | 1 sched:sched_process_exit | ||
647 | 0 sched:sched_wait_task | ||
648 | 0 sched:sched_process_wait | ||
649 | 0 sched:sched_process_fork | ||
650 | 1 sched:sched_process_exec | ||
651 | 0 sched:sched_stat_wait | ||
652 | 2106519415641 sched:sched_stat_sleep | ||
653 | 0 sched:sched_stat_iowait | ||
654 | 147453613 sched:sched_stat_blocked | ||
655 | 12903026955 sched:sched_stat_runtime | ||
656 | 0 sched:sched_pi_setprio | ||
657 | 3574 workqueue:workqueue_queue_work | ||
658 | 3574 workqueue:workqueue_activate_work | ||
659 | 0 workqueue:workqueue_execute_start | ||
660 | 0 workqueue:workqueue_execute_end | ||
661 | 16631 irq:irq_handler_entry | ||
662 | 16631 irq:irq_handler_exit | ||
663 | 28521 irq:softirq_entry | ||
664 | 28521 irq:softirq_exit | ||
665 | 28728 irq:softirq_raise | ||
666 | 1 syscalls:sys_enter_sendmmsg | ||
667 | 1 syscalls:sys_exit_sendmmsg | ||
668 | 0 syscalls:sys_enter_recvmmsg | ||
669 | 0 syscalls:sys_exit_recvmmsg | ||
670 | 14 syscalls:sys_enter_socketcall | ||
671 | 14 syscalls:sys_exit_socketcall | ||
672 | . | ||
673 | . | ||
674 | . | ||
675 | 16965 syscalls:sys_enter_read | ||
676 | 16965 syscalls:sys_exit_read | ||
677 | 12854 syscalls:sys_enter_write | ||
678 | 12854 syscalls:sys_exit_write | ||
679 | . | ||
680 | . | ||
681 | . | ||
682 | |||
683 | 58.029710972 seconds time elapsed | ||
684 | </literallayout> | ||
685 | Let's pick one of these tracepoints and tell perf to do a profile | ||
686 | using it as the sampling event: | ||
687 | <literallayout class='monospaced'> | ||
688 | root@crownbay:~# perf record -g -e sched:sched_wakeup wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
689 | </literallayout> | ||
690 | </para> | ||
691 | |||
692 | <para> | ||
693 | <imagedata fileref="figures/sched-wakeup-profile.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
694 | </para> | ||
695 | |||
696 | <para> | ||
697 | The screenshot above shows the results of running a profile using | ||
698 | sched:sched_switch tracepoint, which shows the relative costs of | ||
699 | various paths to sched_wakeup (note that sched_wakeup is the | ||
700 | name of the tracepoint - it's actually defined just inside | ||
701 | ttwu_do_wakeup(), which accounts for the function name actually | ||
702 | displayed in the profile: | ||
703 | <literallayout class='monospaced'> | ||
704 | /* | ||
705 | * Mark the task runnable and perform wakeup-preemption. | ||
706 | */ | ||
707 | static void | ||
708 | ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) | ||
709 | { | ||
710 | trace_sched_wakeup(p, true); | ||
711 | . | ||
712 | . | ||
713 | . | ||
714 | } | ||
715 | </literallayout> | ||
716 | A couple of the more interesting callchains are expanded and | ||
717 | displayed above, basically some network receive paths that | ||
718 | presumably end up waking up wget (busybox) when network data is | ||
719 | ready. | ||
720 | </para> | ||
721 | |||
722 | <para> | ||
723 | Note that because tracepoints are normally used for tracing, | ||
724 | the default sampling period for tracepoints is 1 i.e. for | ||
725 | tracepoints perf will sample on every event occurrence (this | ||
726 | can be changed using the -c option). This is in contrast to | ||
727 | hardware counters such as for example the default 'cycles' | ||
728 | hardware counter used for normal profiling, where sampling | ||
729 | periods are much higher (in the thousands) because profiling should | ||
730 | have as low an overhead as possible and sampling on every cycle | ||
731 | would be prohibitively expensive. | ||
732 | </para> | ||
733 | </section> | ||
734 | |||
735 | <section id='using-perf-to-do-basic-tracing'> | ||
736 | <title>Using perf to do Basic Tracing</title> | ||
737 | |||
738 | <para> | ||
739 | Profiling is a great tool for solving many problems or for | ||
740 | getting a high-level view of what's going on with a workload or | ||
741 | across the system. It is however by definition an approximation, | ||
742 | as suggested by the most prominent word associated with it, | ||
743 | 'sampling'. On the one hand, it allows a representative picture of | ||
744 | what's going on in the system to be cheaply taken, but on the other | ||
745 | hand, that cheapness limits its utility when that data suggests a | ||
746 | need to 'dive down' more deeply to discover what's really going | ||
747 | on. In such cases, the only way to see what's really going on is | ||
748 | to be able to look at (or summarize more intelligently) the | ||
749 | individual steps that go into the higher-level behavior exposed | ||
750 | by the coarse-grained profiling data. | ||
751 | </para> | ||
752 | |||
753 | <para> | ||
754 | As a concrete example, we can trace all the events we think might | ||
755 | be applicable to our workload: | ||
756 | <literallayout class='monospaced'> | ||
757 | root@crownbay:~# perf record -g -e skb:* -e net:* -e napi:* -e sched:sched_switch -e sched:sched_wakeup -e irq:* | ||
758 | -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write | ||
759 | wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
760 | </literallayout> | ||
761 | We can look at the raw trace output using 'perf script' with no | ||
762 | arguments: | ||
763 | <literallayout class='monospaced'> | ||
764 | root@crownbay:~# perf script | ||
765 | |||
766 | perf 1262 [000] 11624.857082: sys_exit_read: 0x0 | ||
767 | perf 1262 [000] 11624.857193: sched_wakeup: comm=migration/0 pid=6 prio=0 success=1 target_cpu=000 | ||
768 | wget 1262 [001] 11624.858021: softirq_raise: vec=1 [action=TIMER] | ||
769 | wget 1262 [001] 11624.858074: softirq_entry: vec=1 [action=TIMER] | ||
770 | wget 1262 [001] 11624.858081: softirq_exit: vec=1 [action=TIMER] | ||
771 | wget 1262 [001] 11624.858166: sys_enter_read: fd: 0x0003, buf: 0xbf82c940, count: 0x0200 | ||
772 | wget 1262 [001] 11624.858177: sys_exit_read: 0x200 | ||
773 | wget 1262 [001] 11624.858878: kfree_skb: skbaddr=0xeb248d80 protocol=0 location=0xc15a5308 | ||
774 | wget 1262 [001] 11624.858945: kfree_skb: skbaddr=0xeb248000 protocol=0 location=0xc15a5308 | ||
775 | wget 1262 [001] 11624.859020: softirq_raise: vec=1 [action=TIMER] | ||
776 | wget 1262 [001] 11624.859076: softirq_entry: vec=1 [action=TIMER] | ||
777 | wget 1262 [001] 11624.859083: softirq_exit: vec=1 [action=TIMER] | ||
778 | wget 1262 [001] 11624.859167: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400 | ||
779 | wget 1262 [001] 11624.859192: sys_exit_read: 0x1d7 | ||
780 | wget 1262 [001] 11624.859228: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400 | ||
781 | wget 1262 [001] 11624.859233: sys_exit_read: 0x0 | ||
782 | wget 1262 [001] 11624.859573: sys_enter_read: fd: 0x0003, buf: 0xbf82c580, count: 0x0200 | ||
783 | wget 1262 [001] 11624.859584: sys_exit_read: 0x200 | ||
784 | wget 1262 [001] 11624.859864: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400 | ||
785 | wget 1262 [001] 11624.859888: sys_exit_read: 0x400 | ||
786 | wget 1262 [001] 11624.859935: sys_enter_read: fd: 0x0003, buf: 0xb7720000, count: 0x0400 | ||
787 | wget 1262 [001] 11624.859944: sys_exit_read: 0x400 | ||
788 | </literallayout> | ||
789 | This gives us a detailed timestamped sequence of events that | ||
790 | occurred within the workload with respect to those events. | ||
791 | </para> | ||
792 | |||
793 | <para> | ||
794 | In many ways, profiling can be viewed as a subset of tracing - | ||
795 | theoretically, if you have a set of trace events that's sufficient | ||
796 | to capture all the important aspects of a workload, you can derive | ||
797 | any of the results or views that a profiling run can. | ||
798 | </para> | ||
799 | |||
800 | <para> | ||
801 | Another aspect of traditional profiling is that while powerful in | ||
802 | many ways, it's limited by the granularity of the underlying data. | ||
803 | Profiling tools offer various ways of sorting and presenting the | ||
804 | sample data, which make it much more useful and amenable to user | ||
805 | experimentation, but in the end it can't be used in an open-ended | ||
806 | way to extract data that just isn't present as a consequence of | ||
807 | the fact that conceptually, most of it has been thrown away. | ||
808 | </para> | ||
809 | |||
810 | <para> | ||
811 | Full-blown detailed tracing data does however offer the opportunity | ||
812 | to manipulate and present the information collected during a | ||
813 | tracing run in an infinite variety of ways. | ||
814 | </para> | ||
815 | |||
816 | <para> | ||
817 | Another way to look at it is that there are only so many ways that | ||
818 | the 'primitive' counters can be used on their own to generate | ||
819 | interesting output; to get anything more complicated than simple | ||
820 | counts requires some amount of additional logic, which is typically | ||
821 | very specific to the problem at hand. For example, if we wanted to | ||
822 | make use of a 'counter' that maps to the value of the time | ||
823 | difference between when a process was scheduled to run on a | ||
824 | processor and the time it actually ran, we wouldn't expect such | ||
825 | a counter to exist on its own, but we could derive one called say | ||
826 | 'wakeup_latency' and use it to extract a useful view of that metric | ||
827 | from trace data. Likewise, we really can't figure out from standard | ||
828 | profiling tools how much data every process on the system reads and | ||
829 | writes, along with how many of those reads and writes fail | ||
830 | completely. If we have sufficient trace data, however, we could | ||
831 | with the right tools easily extract and present that information, | ||
832 | but we'd need something other than pre-canned profiling tools to | ||
833 | do that. | ||
834 | </para> | ||
835 | |||
836 | <para> | ||
837 | Luckily, there is a general-purpose way to handle such needs, | ||
838 | called 'programming languages'. Making programming languages | ||
839 | easily available to apply to such problems given the specific | ||
840 | format of data is called a 'programming language binding' for | ||
841 | that data and language. Perf supports two programming language | ||
842 | bindings, one for Python and one for Perl. | ||
843 | </para> | ||
844 | |||
845 | <informalexample> | ||
846 | <emphasis>Tying it Together:</emphasis> Language bindings for manipulating and | ||
847 | aggregating trace data are of course not a new | ||
848 | idea. One of the first projects to do this was IBM's DProbes | ||
849 | dpcc compiler, an ANSI C compiler which targeted a low-level | ||
850 | assembly language running on an in-kernel interpreter on the | ||
851 | target system. This is exactly analogous to what Sun's DTrace | ||
852 | did, except that DTrace invented its own language for the purpose. | ||
853 | Systemtap, heavily inspired by DTrace, also created its own | ||
854 | one-off language, but rather than running the product on an | ||
855 | in-kernel interpreter, created an elaborate compiler-based | ||
856 | machinery to translate its language into kernel modules written | ||
857 | in C. | ||
858 | </informalexample> | ||
859 | |||
860 | <para> | ||
861 | Now that we have the trace data in perf.data, we can use | ||
862 | 'perf script -g' to generate a skeleton script with handlers | ||
863 | for the read/write entry/exit events we recorded: | ||
864 | <literallayout class='monospaced'> | ||
865 | root@crownbay:~# perf script -g python | ||
866 | generated Python script: perf-script.py | ||
867 | </literallayout> | ||
868 | The skeleton script simply creates a python function for each | ||
869 | event type in the perf.data file. The body of each function simply | ||
870 | prints the event name along with its parameters. For example: | ||
871 | <literallayout class='monospaced'> | ||
872 | def net__netif_rx(event_name, context, common_cpu, | ||
873 | common_secs, common_nsecs, common_pid, common_comm, | ||
874 | skbaddr, len, name): | ||
875 | print_header(event_name, common_cpu, common_secs, common_nsecs, | ||
876 | common_pid, common_comm) | ||
877 | |||
878 | print "skbaddr=%u, len=%u, name=%s\n" % (skbaddr, len, name), | ||
879 | </literallayout> | ||
880 | We can run that script directly to print all of the events | ||
881 | contained in the perf.data file: | ||
882 | <literallayout class='monospaced'> | ||
883 | root@crownbay:~# perf script -s perf-script.py | ||
884 | |||
885 | in trace_begin | ||
886 | syscalls__sys_exit_read 0 11624.857082795 1262 perf nr=3, ret=0 | ||
887 | sched__sched_wakeup 0 11624.857193498 1262 perf comm=migration/0, pid=6, prio=0, success=1, target_cpu=0 | ||
888 | irq__softirq_raise 1 11624.858021635 1262 wget vec=TIMER | ||
889 | irq__softirq_entry 1 11624.858074075 1262 wget vec=TIMER | ||
890 | irq__softirq_exit 1 11624.858081389 1262 wget vec=TIMER | ||
891 | syscalls__sys_enter_read 1 11624.858166434 1262 wget nr=3, fd=3, buf=3213019456, count=512 | ||
892 | syscalls__sys_exit_read 1 11624.858177924 1262 wget nr=3, ret=512 | ||
893 | skb__kfree_skb 1 11624.858878188 1262 wget skbaddr=3945041280, location=3243922184, protocol=0 | ||
894 | skb__kfree_skb 1 11624.858945608 1262 wget skbaddr=3945037824, location=3243922184, protocol=0 | ||
895 | irq__softirq_raise 1 11624.859020942 1262 wget vec=TIMER | ||
896 | irq__softirq_entry 1 11624.859076935 1262 wget vec=TIMER | ||
897 | irq__softirq_exit 1 11624.859083469 1262 wget vec=TIMER | ||
898 | syscalls__sys_enter_read 1 11624.859167565 1262 wget nr=3, fd=3, buf=3077701632, count=1024 | ||
899 | syscalls__sys_exit_read 1 11624.859192533 1262 wget nr=3, ret=471 | ||
900 | syscalls__sys_enter_read 1 11624.859228072 1262 wget nr=3, fd=3, buf=3077701632, count=1024 | ||
901 | syscalls__sys_exit_read 1 11624.859233707 1262 wget nr=3, ret=0 | ||
902 | syscalls__sys_enter_read 1 11624.859573008 1262 wget nr=3, fd=3, buf=3213018496, count=512 | ||
903 | syscalls__sys_exit_read 1 11624.859584818 1262 wget nr=3, ret=512 | ||
904 | syscalls__sys_enter_read 1 11624.859864562 1262 wget nr=3, fd=3, buf=3077701632, count=1024 | ||
905 | syscalls__sys_exit_read 1 11624.859888770 1262 wget nr=3, ret=1024 | ||
906 | syscalls__sys_enter_read 1 11624.859935140 1262 wget nr=3, fd=3, buf=3077701632, count=1024 | ||
907 | syscalls__sys_exit_read 1 11624.859944032 1262 wget nr=3, ret=1024 | ||
908 | </literallayout> | ||
909 | That in itself isn't very useful; after all, we can accomplish | ||
910 | pretty much the same thing by simply running 'perf script' | ||
911 | without arguments in the same directory as the perf.data file. | ||
912 | </para> | ||
913 | |||
914 | <para> | ||
915 | We can however replace the print statements in the generated | ||
916 | function bodies with whatever we want, and thereby make it | ||
917 | infinitely more useful. | ||
918 | </para> | ||
919 | |||
920 | <para> | ||
921 | As a simple example, let's just replace the print statements in | ||
922 | the function bodies with a simple function that does nothing but | ||
923 | increment a per-event count. When the program is run against a | ||
924 | perf.data file, each time a particular event is encountered, | ||
925 | a tally is incremented for that event. For example: | ||
926 | <literallayout class='monospaced'> | ||
927 | def net__netif_rx(event_name, context, common_cpu, | ||
928 | common_secs, common_nsecs, common_pid, common_comm, | ||
929 | skbaddr, len, name): | ||
930 | inc_counts(event_name) | ||
931 | </literallayout> | ||
932 | Each event handler function in the generated code is modified | ||
933 | to do this. For convenience, we define a common function called | ||
934 | inc_counts() that each handler calls; inc_counts() simply tallies | ||
935 | a count for each event using the 'counts' hash, which is a | ||
936 | specialized hash function that does Perl-like autovivification, a | ||
937 | capability that's extremely useful for kinds of multi-level | ||
938 | aggregation commonly used in processing traces (see perf's | ||
939 | documentation on the Python language binding for details): | ||
940 | <literallayout class='monospaced'> | ||
941 | counts = autodict() | ||
942 | |||
943 | def inc_counts(event_name): | ||
944 | try: | ||
945 | counts[event_name] += 1 | ||
946 | except TypeError: | ||
947 | counts[event_name] = 1 | ||
948 | </literallayout> | ||
949 | Finally, at the end of the trace processing run, we want to | ||
950 | print the result of all the per-event tallies. For that, we | ||
951 | use the special 'trace_end()' function: | ||
952 | <literallayout class='monospaced'> | ||
953 | def trace_end(): | ||
954 | for event_name, count in counts.iteritems(): | ||
955 | print "%-40s %10s\n" % (event_name, count) | ||
956 | </literallayout> | ||
957 | The end result is a summary of all the events recorded in the | ||
958 | trace: | ||
959 | <literallayout class='monospaced'> | ||
960 | skb__skb_copy_datagram_iovec 13148 | ||
961 | irq__softirq_entry 4796 | ||
962 | irq__irq_handler_exit 3805 | ||
963 | irq__softirq_exit 4795 | ||
964 | syscalls__sys_enter_write 8990 | ||
965 | net__net_dev_xmit 652 | ||
966 | skb__kfree_skb 4047 | ||
967 | sched__sched_wakeup 1155 | ||
968 | irq__irq_handler_entry 3804 | ||
969 | irq__softirq_raise 4799 | ||
970 | net__net_dev_queue 652 | ||
971 | syscalls__sys_enter_read 17599 | ||
972 | net__netif_receive_skb 1743 | ||
973 | syscalls__sys_exit_read 17598 | ||
974 | net__netif_rx 2 | ||
975 | napi__napi_poll 1877 | ||
976 | syscalls__sys_exit_write 8990 | ||
977 | </literallayout> | ||
978 | Note that this is pretty much exactly the same information we get | ||
979 | from 'perf stat', which goes a little way to support the idea | ||
980 | mentioned previously that given the right kind of trace data, | ||
981 | higher-level profiling-type summaries can be derived from it. | ||
982 | </para> | ||
983 | |||
984 | <para> | ||
985 | Documentation on using the | ||
986 | <ulink url='http://linux.die.net/man/1/perf-script-python'>'perf script' python binding</ulink>. | ||
987 | </para> | ||
988 | </section> | ||
989 | |||
990 | <section id='system-wide-tracing-and-profiling'> | ||
991 | <title>System-Wide Tracing and Profiling</title> | ||
992 | |||
993 | <para> | ||
994 | The examples so far have focused on tracing a particular program or | ||
995 | workload - in other words, every profiling run has specified the | ||
996 | program to profile in the command-line e.g. 'perf record wget ...'. | ||
997 | </para> | ||
998 | |||
999 | <para> | ||
1000 | It's also possible, and more interesting in many cases, to run a | ||
1001 | system-wide profile or trace while running the workload in a | ||
1002 | separate shell. | ||
1003 | </para> | ||
1004 | |||
1005 | <para> | ||
1006 | To do system-wide profiling or tracing, you typically use | ||
1007 | the -a flag to 'perf record'. | ||
1008 | </para> | ||
1009 | |||
1010 | <para> | ||
1011 | To demonstrate this, open up one window and start the profile | ||
1012 | using the -a flag (press Ctrl-C to stop tracing): | ||
1013 | <literallayout class='monospaced'> | ||
1014 | root@crownbay:~# perf record -g -a | ||
1015 | ^C[ perf record: Woken up 6 times to write data ] | ||
1016 | [ perf record: Captured and wrote 1.400 MB perf.data (~61172 samples) ] | ||
1017 | </literallayout> | ||
1018 | In another window, run the wget test: | ||
1019 | <literallayout class='monospaced'> | ||
1020 | root@crownbay:~# wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink> | ||
1021 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
1022 | linux-2.6.19.2.tar.b 100% |*******************************| 41727k 0:00:00 ETA | ||
1023 | </literallayout> | ||
1024 | Here we see entries not only for our wget load, but for other | ||
1025 | processes running on the system as well: | ||
1026 | </para> | ||
1027 | |||
1028 | <para> | ||
1029 | <imagedata fileref="figures/perf-systemwide.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
1030 | </para> | ||
1031 | |||
1032 | <para> | ||
1033 | In the snapshot above, we can see callchains that originate in | ||
1034 | libc, and a callchain from Xorg that demonstrates that we're | ||
1035 | using a proprietary X driver in userspace (notice the presence | ||
1036 | of 'PVR' and some other unresolvable symbols in the expanded | ||
1037 | Xorg callchain). | ||
1038 | </para> | ||
1039 | |||
1040 | <para> | ||
1041 | Note also that we have both kernel and userspace entries in the | ||
1042 | above snapshot. We can also tell perf to focus on userspace but | ||
1043 | providing a modifier, in this case 'u', to the 'cycles' hardware | ||
1044 | counter when we record a profile: | ||
1045 | <literallayout class='monospaced'> | ||
1046 | root@crownbay:~# perf record -g -a -e cycles:u | ||
1047 | ^C[ perf record: Woken up 2 times to write data ] | ||
1048 | [ perf record: Captured and wrote 0.376 MB perf.data (~16443 samples) ] | ||
1049 | </literallayout> | ||
1050 | </para> | ||
1051 | |||
1052 | <para> | ||
1053 | <imagedata fileref="figures/perf-report-cycles-u.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
1054 | </para> | ||
1055 | |||
1056 | <para> | ||
1057 | Notice in the screenshot above, we see only userspace entries ([.]) | ||
1058 | </para> | ||
1059 | |||
1060 | <para> | ||
1061 | Finally, we can press 'enter' on a leaf node and select the 'Zoom | ||
1062 | into DSO' menu item to show only entries associated with a | ||
1063 | specific DSO. In the screenshot below, we've zoomed into the | ||
1064 | 'libc' DSO which shows all the entries associated with the | ||
1065 | libc-xxx.so DSO. | ||
1066 | </para> | ||
1067 | |||
1068 | <para> | ||
1069 | <imagedata fileref="figures/perf-systemwide-libc.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
1070 | </para> | ||
1071 | |||
1072 | <para> | ||
1073 | We can also use the system-wide -a switch to do system-wide | ||
1074 | tracing. Here we'll trace a couple of scheduler events: | ||
1075 | <literallayout class='monospaced'> | ||
1076 | root@crownbay:~# perf record -a -e sched:sched_switch -e sched:sched_wakeup | ||
1077 | ^C[ perf record: Woken up 38 times to write data ] | ||
1078 | [ perf record: Captured and wrote 9.780 MB perf.data (~427299 samples) ] | ||
1079 | </literallayout> | ||
1080 | We can look at the raw output using 'perf script' with no | ||
1081 | arguments: | ||
1082 | <literallayout class='monospaced'> | ||
1083 | root@crownbay:~# perf script | ||
1084 | |||
1085 | perf 1383 [001] 6171.460045: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
1086 | perf 1383 [001] 6171.460066: sched_switch: prev_comm=perf prev_pid=1383 prev_prio=120 prev_state=R+ ==> next_comm=kworker/1:1 next_pid=21 next_prio=120 | ||
1087 | kworker/1:1 21 [001] 6171.460093: sched_switch: prev_comm=kworker/1:1 prev_pid=21 prev_prio=120 prev_state=S ==> next_comm=perf next_pid=1383 next_prio=120 | ||
1088 | swapper 0 [000] 6171.468063: sched_wakeup: comm=kworker/0:3 pid=1209 prio=120 success=1 target_cpu=000 | ||
1089 | swapper 0 [000] 6171.468107: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/0:3 next_pid=1209 next_prio=120 | ||
1090 | kworker/0:3 1209 [000] 6171.468143: sched_switch: prev_comm=kworker/0:3 prev_pid=1209 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120 | ||
1091 | perf 1383 [001] 6171.470039: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
1092 | perf 1383 [001] 6171.470058: sched_switch: prev_comm=perf prev_pid=1383 prev_prio=120 prev_state=R+ ==> next_comm=kworker/1:1 next_pid=21 next_prio=120 | ||
1093 | kworker/1:1 21 [001] 6171.470082: sched_switch: prev_comm=kworker/1:1 prev_pid=21 prev_prio=120 prev_state=S ==> next_comm=perf next_pid=1383 next_prio=120 | ||
1094 | perf 1383 [001] 6171.480035: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
1095 | </literallayout> | ||
1096 | </para> | ||
1097 | |||
1098 | <section id='perf-filtering'> | ||
1099 | <title>Filtering</title> | ||
1100 | |||
1101 | <para> | ||
1102 | Notice that there are a lot of events that don't really have | ||
1103 | anything to do with what we're interested in, namely events | ||
1104 | that schedule 'perf' itself in and out or that wake perf up. | ||
1105 | We can get rid of those by using the '--filter' option - | ||
1106 | for each event we specify using -e, we can add a --filter | ||
1107 | after that to filter out trace events that contain fields | ||
1108 | with specific values: | ||
1109 | <literallayout class='monospaced'> | ||
1110 | root@crownbay:~# perf record -a -e sched:sched_switch --filter 'next_comm != perf && prev_comm != perf' -e sched:sched_wakeup --filter 'comm != perf' | ||
1111 | ^C[ perf record: Woken up 38 times to write data ] | ||
1112 | [ perf record: Captured and wrote 9.688 MB perf.data (~423279 samples) ] | ||
1113 | |||
1114 | |||
1115 | root@crownbay:~# perf script | ||
1116 | |||
1117 | swapper 0 [000] 7932.162180: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/0:3 next_pid=1209 next_prio=120 | ||
1118 | kworker/0:3 1209 [000] 7932.162236: sched_switch: prev_comm=kworker/0:3 prev_pid=1209 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120 | ||
1119 | perf 1407 [001] 7932.170048: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
1120 | perf 1407 [001] 7932.180044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
1121 | perf 1407 [001] 7932.190038: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
1122 | perf 1407 [001] 7932.200044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
1123 | perf 1407 [001] 7932.210044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
1124 | perf 1407 [001] 7932.220044: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
1125 | swapper 0 [001] 7932.230111: sched_wakeup: comm=kworker/1:1 pid=21 prio=120 success=1 target_cpu=001 | ||
1126 | swapper 0 [001] 7932.230146: sched_switch: prev_comm=swapper/1 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/1:1 next_pid=21 next_prio=120 | ||
1127 | kworker/1:1 21 [001] 7932.230205: sched_switch: prev_comm=kworker/1:1 prev_pid=21 prev_prio=120 prev_state=S ==> next_comm=swapper/1 next_pid=0 next_prio=120 | ||
1128 | swapper 0 [000] 7932.326109: sched_wakeup: comm=kworker/0:3 pid=1209 prio=120 success=1 target_cpu=000 | ||
1129 | swapper 0 [000] 7932.326171: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/0:3 next_pid=1209 next_prio=120 | ||
1130 | kworker/0:3 1209 [000] 7932.326214: sched_switch: prev_comm=kworker/0:3 prev_pid=1209 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120 | ||
1131 | </literallayout> | ||
1132 | In this case, we've filtered out all events that have 'perf' | ||
1133 | in their 'comm' or 'comm_prev' or 'comm_next' fields. Notice | ||
1134 | that there are still events recorded for perf, but notice | ||
1135 | that those events don't have values of 'perf' for the filtered | ||
1136 | fields. To completely filter out anything from perf will | ||
1137 | require a bit more work, but for the purpose of demonstrating | ||
1138 | how to use filters, it's close enough. | ||
1139 | </para> | ||
1140 | |||
1141 | <informalexample> | ||
1142 | <emphasis>Tying it Together:</emphasis> These are exactly the same set of event | ||
1143 | filters defined by the trace event subsystem. See the | ||
1144 | ftrace/tracecmd/kernelshark section for more discussion about | ||
1145 | these event filters. | ||
1146 | </informalexample> | ||
1147 | |||
1148 | <informalexample> | ||
1149 | <emphasis>Tying it Together:</emphasis> These event filters are implemented by a | ||
1150 | special-purpose pseudo-interpreter in the kernel and are an | ||
1151 | integral and indispensable part of the perf design as it | ||
1152 | relates to tracing. kernel-based event filters provide a | ||
1153 | mechanism to precisely throttle the event stream that appears | ||
1154 | in user space, where it makes sense to provide bindings to real | ||
1155 | programming languages for postprocessing the event stream. | ||
1156 | This architecture allows for the intelligent and flexible | ||
1157 | partitioning of processing between the kernel and user space. | ||
1158 | Contrast this with other tools such as SystemTap, which does | ||
1159 | all of its processing in the kernel and as such requires a | ||
1160 | special project-defined language in order to accommodate that | ||
1161 | design, or LTTng, where everything is sent to userspace and | ||
1162 | as such requires a super-efficient kernel-to-userspace | ||
1163 | transport mechanism in order to function properly. While | ||
1164 | perf certainly can benefit from for instance advances in | ||
1165 | the design of the transport, it doesn't fundamentally depend | ||
1166 | on them. Basically, if you find that your perf tracing | ||
1167 | application is causing buffer I/O overruns, it probably | ||
1168 | means that you aren't taking enough advantage of the | ||
1169 | kernel filtering engine. | ||
1170 | </informalexample> | ||
1171 | </section> | ||
1172 | </section> | ||
1173 | |||
1174 | <section id='using-dynamic-tracepoints'> | ||
1175 | <title>Using Dynamic Tracepoints</title> | ||
1176 | |||
1177 | <para> | ||
1178 | perf isn't restricted to the fixed set of static tracepoints | ||
1179 | listed by 'perf list'. Users can also add their own 'dynamic' | ||
1180 | tracepoints anywhere in the kernel. For instance, suppose we | ||
1181 | want to define our own tracepoint on do_fork(). We can do that | ||
1182 | using the 'perf probe' perf subcommand: | ||
1183 | <literallayout class='monospaced'> | ||
1184 | root@crownbay:~# perf probe do_fork | ||
1185 | Added new event: | ||
1186 | probe:do_fork (on do_fork) | ||
1187 | |||
1188 | You can now use it in all perf tools, such as: | ||
1189 | |||
1190 | perf record -e probe:do_fork -aR sleep 1 | ||
1191 | </literallayout> | ||
1192 | Adding a new tracepoint via 'perf probe' results in an event | ||
1193 | with all the expected files and format in | ||
1194 | /sys/kernel/debug/tracing/events, just the same as for static | ||
1195 | tracepoints (as discussed in more detail in the trace events | ||
1196 | subsystem section: | ||
1197 | <literallayout class='monospaced'> | ||
1198 | root@crownbay:/sys/kernel/debug/tracing/events/probe/do_fork# ls -al | ||
1199 | drwxr-xr-x 2 root root 0 Oct 28 11:42 . | ||
1200 | drwxr-xr-x 3 root root 0 Oct 28 11:42 .. | ||
1201 | -rw-r--r-- 1 root root 0 Oct 28 11:42 enable | ||
1202 | -rw-r--r-- 1 root root 0 Oct 28 11:42 filter | ||
1203 | -r--r--r-- 1 root root 0 Oct 28 11:42 format | ||
1204 | -r--r--r-- 1 root root 0 Oct 28 11:42 id | ||
1205 | |||
1206 | root@crownbay:/sys/kernel/debug/tracing/events/probe/do_fork# cat format | ||
1207 | name: do_fork | ||
1208 | ID: 944 | ||
1209 | format: | ||
1210 | field:unsigned short common_type; offset:0; size:2; signed:0; | ||
1211 | field:unsigned char common_flags; offset:2; size:1; signed:0; | ||
1212 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; | ||
1213 | field:int common_pid; offset:4; size:4; signed:1; | ||
1214 | field:int common_padding; offset:8; size:4; signed:1; | ||
1215 | |||
1216 | field:unsigned long __probe_ip; offset:12; size:4; signed:0; | ||
1217 | |||
1218 | print fmt: "(%lx)", REC->__probe_ip | ||
1219 | </literallayout> | ||
1220 | We can list all dynamic tracepoints currently in existence: | ||
1221 | <literallayout class='monospaced'> | ||
1222 | root@crownbay:~# perf probe -l | ||
1223 | probe:do_fork (on do_fork) | ||
1224 | probe:schedule (on schedule) | ||
1225 | </literallayout> | ||
1226 | Let's record system-wide ('sleep 30' is a trick for recording | ||
1227 | system-wide but basically do nothing and then wake up after | ||
1228 | 30 seconds): | ||
1229 | <literallayout class='monospaced'> | ||
1230 | root@crownbay:~# perf record -g -a -e probe:do_fork sleep 30 | ||
1231 | [ perf record: Woken up 1 times to write data ] | ||
1232 | [ perf record: Captured and wrote 0.087 MB perf.data (~3812 samples) ] | ||
1233 | </literallayout> | ||
1234 | Using 'perf script' we can see each do_fork event that fired: | ||
1235 | <literallayout class='monospaced'> | ||
1236 | root@crownbay:~# perf script | ||
1237 | |||
1238 | # ======== | ||
1239 | # captured on: Sun Oct 28 11:55:18 2012 | ||
1240 | # hostname : crownbay | ||
1241 | # os release : 3.4.11-yocto-standard | ||
1242 | # perf version : 3.4.11 | ||
1243 | # arch : i686 | ||
1244 | # nrcpus online : 2 | ||
1245 | # nrcpus avail : 2 | ||
1246 | # cpudesc : Intel(R) Atom(TM) CPU E660 @ 1.30GHz | ||
1247 | # cpuid : GenuineIntel,6,38,1 | ||
1248 | # total memory : 1017184 kB | ||
1249 | # cmdline : /usr/bin/perf record -g -a -e probe:do_fork sleep 30 | ||
1250 | # event : name = probe:do_fork, type = 2, config = 0x3b0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern | ||
1251 | = 0, id = { 5, 6 } | ||
1252 | # HEADER_CPU_TOPOLOGY info available, use -I to display | ||
1253 | # ======== | ||
1254 | # | ||
1255 | matchbox-deskto 1197 [001] 34211.378318: do_fork: (c1028460) | ||
1256 | matchbox-deskto 1295 [001] 34211.380388: do_fork: (c1028460) | ||
1257 | pcmanfm 1296 [000] 34211.632350: do_fork: (c1028460) | ||
1258 | pcmanfm 1296 [000] 34211.639917: do_fork: (c1028460) | ||
1259 | matchbox-deskto 1197 [001] 34217.541603: do_fork: (c1028460) | ||
1260 | matchbox-deskto 1299 [001] 34217.543584: do_fork: (c1028460) | ||
1261 | gthumb 1300 [001] 34217.697451: do_fork: (c1028460) | ||
1262 | gthumb 1300 [001] 34219.085734: do_fork: (c1028460) | ||
1263 | gthumb 1300 [000] 34219.121351: do_fork: (c1028460) | ||
1264 | gthumb 1300 [001] 34219.264551: do_fork: (c1028460) | ||
1265 | pcmanfm 1296 [000] 34219.590380: do_fork: (c1028460) | ||
1266 | matchbox-deskto 1197 [001] 34224.955965: do_fork: (c1028460) | ||
1267 | matchbox-deskto 1306 [001] 34224.957972: do_fork: (c1028460) | ||
1268 | matchbox-termin 1307 [000] 34225.038214: do_fork: (c1028460) | ||
1269 | matchbox-termin 1307 [001] 34225.044218: do_fork: (c1028460) | ||
1270 | matchbox-termin 1307 [000] 34225.046442: do_fork: (c1028460) | ||
1271 | matchbox-deskto 1197 [001] 34237.112138: do_fork: (c1028460) | ||
1272 | matchbox-deskto 1311 [001] 34237.114106: do_fork: (c1028460) | ||
1273 | gaku 1312 [000] 34237.202388: do_fork: (c1028460) | ||
1274 | </literallayout> | ||
1275 | And using 'perf report' on the same file, we can see the | ||
1276 | callgraphs from starting a few programs during those 30 seconds: | ||
1277 | </para> | ||
1278 | |||
1279 | <para> | ||
1280 | <imagedata fileref="figures/perf-probe-do_fork-profile.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
1281 | </para> | ||
1282 | |||
1283 | <informalexample> | ||
1284 | <emphasis>Tying it Together:</emphasis> The trace events subsystem accommodate static | ||
1285 | and dynamic tracepoints in exactly the same way - there's no | ||
1286 | difference as far as the infrastructure is concerned. See the | ||
1287 | ftrace section for more details on the trace event subsystem. | ||
1288 | </informalexample> | ||
1289 | |||
1290 | <informalexample> | ||
1291 | <emphasis>Tying it Together:</emphasis> Dynamic tracepoints are implemented under the | ||
1292 | covers by kprobes and uprobes. kprobes and uprobes are also used | ||
1293 | by and in fact are the main focus of SystemTap. | ||
1294 | </informalexample> | ||
1295 | </section> | ||
1296 | </section> | ||
1297 | |||
1298 | <section id='perf-documentation'> | ||
1299 | <title>Documentation</title> | ||
1300 | |||
1301 | <para> | ||
1302 | Online versions of the man pages for the commands discussed in this | ||
1303 | section can be found here: | ||
1304 | <itemizedlist> | ||
1305 | <listitem><para>The <ulink url='http://linux.die.net/man/1/perf-stat'>'perf stat' manpage</ulink>. | ||
1306 | </para></listitem> | ||
1307 | <listitem><para>The <ulink url='http://linux.die.net/man/1/perf-record'>'perf record' manpage</ulink>. | ||
1308 | </para></listitem> | ||
1309 | <listitem><para>The <ulink url='http://linux.die.net/man/1/perf-report'>'perf report' manpage</ulink>. | ||
1310 | </para></listitem> | ||
1311 | <listitem><para>The <ulink url='http://linux.die.net/man/1/perf-probe'>'perf probe' manpage</ulink>. | ||
1312 | </para></listitem> | ||
1313 | <listitem><para>The <ulink url='http://linux.die.net/man/1/perf-script'>'perf script' manpage</ulink>. | ||
1314 | </para></listitem> | ||
1315 | <listitem><para>Documentation on using the | ||
1316 | <ulink url='http://linux.die.net/man/1/perf-script-python'>'perf script' python binding</ulink>. | ||
1317 | </para></listitem> | ||
1318 | <listitem><para>The top-level | ||
1319 | <ulink url='http://linux.die.net/man/1/perf'>perf(1) manpage</ulink>. | ||
1320 | </para></listitem> | ||
1321 | </itemizedlist> | ||
1322 | </para> | ||
1323 | |||
1324 | <para> | ||
1325 | Normally, you should be able to invoke the man pages via perf | ||
1326 | itself e.g. 'perf help' or 'perf help record'. | ||
1327 | </para> | ||
1328 | |||
1329 | <para> | ||
1330 | However, by default Yocto doesn't install man pages, but perf | ||
1331 | invokes the man pages for most help functionality. This is a bug | ||
1332 | and is being addressed by a Yocto bug: | ||
1333 | <ulink url='https://bugzilla.yoctoproject.org/show_bug.cgi?id=3388'>Bug 3388 - perf: enable man pages for basic 'help' functionality</ulink>. | ||
1334 | </para> | ||
1335 | |||
1336 | <para> | ||
1337 | The man pages in text form, along with some other files, such as | ||
1338 | a set of examples, can be found in the 'perf' directory of the | ||
1339 | kernel tree: | ||
1340 | <literallayout class='monospaced'> | ||
1341 | tools/perf/Documentation | ||
1342 | </literallayout> | ||
1343 | There's also a nice perf tutorial on the perf wiki that goes | ||
1344 | into more detail than we do here in certain areas: | ||
1345 | <ulink url='https://perf.wiki.kernel.org/index.php/Tutorial'>Perf Tutorial</ulink> | ||
1346 | </para> | ||
1347 | </section> | ||
1348 | </section> | ||
1349 | |||
1350 | <section id='profile-manual-ftrace'> | ||
1351 | <title>ftrace</title> | ||
1352 | |||
1353 | <para> | ||
1354 | 'ftrace' literally refers to the 'ftrace function tracer' but in | ||
1355 | reality this encompasses a number of related tracers along with | ||
1356 | the infrastructure that they all make use of. | ||
1357 | </para> | ||
1358 | |||
1359 | <section id='ftrace-setup'> | ||
1360 | <title>Setup</title> | ||
1361 | |||
1362 | <para> | ||
1363 | For this section, we'll assume you've already performed the basic | ||
1364 | setup outlined in the General Setup section. | ||
1365 | </para> | ||
1366 | |||
1367 | <para> | ||
1368 | ftrace, trace-cmd, and kernelshark run on the target system, | ||
1369 | and are ready to go out-of-the-box - no additional setup is | ||
1370 | necessary. For the rest of this section we assume you've ssh'ed | ||
1371 | to the host and will be running ftrace on the target. kernelshark | ||
1372 | is a GUI application and if you use the '-X' option to ssh you | ||
1373 | can have the kernelshark GUI run on the target but display | ||
1374 | remotely on the host if you want. | ||
1375 | </para> | ||
1376 | </section> | ||
1377 | |||
1378 | <section id='basic-ftrace-usage'> | ||
1379 | <title>Basic ftrace usage</title> | ||
1380 | |||
1381 | <para> | ||
1382 | 'ftrace' essentially refers to everything included in | ||
1383 | the /tracing directory of the mounted debugfs filesystem | ||
1384 | (Yocto follows the standard convention and mounts it | ||
1385 | at /sys/kernel/debug). Here's a listing of all the files | ||
1386 | found in /sys/kernel/debug/tracing on a Yocto system: | ||
1387 | <literallayout class='monospaced'> | ||
1388 | root@sugarbay:/sys/kernel/debug/tracing# ls | ||
1389 | README kprobe_events trace | ||
1390 | available_events kprobe_profile trace_clock | ||
1391 | available_filter_functions options trace_marker | ||
1392 | available_tracers per_cpu trace_options | ||
1393 | buffer_size_kb printk_formats trace_pipe | ||
1394 | buffer_total_size_kb saved_cmdlines tracing_cpumask | ||
1395 | current_tracer set_event tracing_enabled | ||
1396 | dyn_ftrace_total_info set_ftrace_filter tracing_on | ||
1397 | enabled_functions set_ftrace_notrace tracing_thresh | ||
1398 | events set_ftrace_pid | ||
1399 | free_buffer set_graph_function | ||
1400 | </literallayout> | ||
1401 | The files listed above are used for various purposes - | ||
1402 | some relate directly to the tracers themselves, others are | ||
1403 | used to set tracing options, and yet others actually contain | ||
1404 | the tracing output when a tracer is in effect. Some of the | ||
1405 | functions can be guessed from their names, others need | ||
1406 | explanation; in any case, we'll cover some of the files we | ||
1407 | see here below but for an explanation of the others, please | ||
1408 | see the ftrace documentation. | ||
1409 | </para> | ||
1410 | |||
1411 | <para> | ||
1412 | We'll start by looking at some of the available built-in | ||
1413 | tracers. | ||
1414 | </para> | ||
1415 | |||
1416 | <para> | ||
1417 | cat'ing the 'available_tracers' file lists the set of | ||
1418 | available tracers: | ||
1419 | <literallayout class='monospaced'> | ||
1420 | root@sugarbay:/sys/kernel/debug/tracing# cat available_tracers | ||
1421 | blk function_graph function nop | ||
1422 | </literallayout> | ||
1423 | The 'current_tracer' file contains the tracer currently in | ||
1424 | effect: | ||
1425 | <literallayout class='monospaced'> | ||
1426 | root@sugarbay:/sys/kernel/debug/tracing# cat current_tracer | ||
1427 | nop | ||
1428 | </literallayout> | ||
1429 | The above listing of current_tracer shows that | ||
1430 | the 'nop' tracer is in effect, which is just another | ||
1431 | way of saying that there's actually no tracer | ||
1432 | currently in effect. | ||
1433 | </para> | ||
1434 | |||
1435 | <para> | ||
1436 | echo'ing one of the available_tracers into current_tracer | ||
1437 | makes the specified tracer the current tracer: | ||
1438 | <literallayout class='monospaced'> | ||
1439 | root@sugarbay:/sys/kernel/debug/tracing# echo function > current_tracer | ||
1440 | root@sugarbay:/sys/kernel/debug/tracing# cat current_tracer | ||
1441 | function | ||
1442 | </literallayout> | ||
1443 | The above sets the current tracer to be the | ||
1444 | 'function tracer'. This tracer traces every function | ||
1445 | call in the kernel and makes it available as the | ||
1446 | contents of the 'trace' file. Reading the 'trace' file | ||
1447 | lists the currently buffered function calls that have been | ||
1448 | traced by the function tracer: | ||
1449 | <literallayout class='monospaced'> | ||
1450 | root@sugarbay:/sys/kernel/debug/tracing# cat trace | less | ||
1451 | |||
1452 | # tracer: function | ||
1453 | # | ||
1454 | # entries-in-buffer/entries-written: 310629/766471 #P:8 | ||
1455 | # | ||
1456 | # _-----=> irqs-off | ||
1457 | # / _----=> need-resched | ||
1458 | # | / _---=> hardirq/softirq | ||
1459 | # || / _--=> preempt-depth | ||
1460 | # ||| / delay | ||
1461 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
1462 | # | | | |||| | | | ||
1463 | <idle>-0 [004] d..1 470.867169: ktime_get_real <-intel_idle | ||
1464 | <idle>-0 [004] d..1 470.867170: getnstimeofday <-ktime_get_real | ||
1465 | <idle>-0 [004] d..1 470.867171: ns_to_timeval <-intel_idle | ||
1466 | <idle>-0 [004] d..1 470.867171: ns_to_timespec <-ns_to_timeval | ||
1467 | <idle>-0 [004] d..1 470.867172: smp_apic_timer_interrupt <-apic_timer_interrupt | ||
1468 | <idle>-0 [004] d..1 470.867172: native_apic_mem_write <-smp_apic_timer_interrupt | ||
1469 | <idle>-0 [004] d..1 470.867172: irq_enter <-smp_apic_timer_interrupt | ||
1470 | <idle>-0 [004] d..1 470.867172: rcu_irq_enter <-irq_enter | ||
1471 | <idle>-0 [004] d..1 470.867173: rcu_idle_exit_common.isra.33 <-rcu_irq_enter | ||
1472 | <idle>-0 [004] d..1 470.867173: local_bh_disable <-irq_enter | ||
1473 | <idle>-0 [004] d..1 470.867173: add_preempt_count <-local_bh_disable | ||
1474 | <idle>-0 [004] d.s1 470.867174: tick_check_idle <-irq_enter | ||
1475 | <idle>-0 [004] d.s1 470.867174: tick_check_oneshot_broadcast <-tick_check_idle | ||
1476 | <idle>-0 [004] d.s1 470.867174: ktime_get <-tick_check_idle | ||
1477 | <idle>-0 [004] d.s1 470.867174: tick_nohz_stop_idle <-tick_check_idle | ||
1478 | <idle>-0 [004] d.s1 470.867175: update_ts_time_stats <-tick_nohz_stop_idle | ||
1479 | <idle>-0 [004] d.s1 470.867175: nr_iowait_cpu <-update_ts_time_stats | ||
1480 | <idle>-0 [004] d.s1 470.867175: tick_do_update_jiffies64 <-tick_check_idle | ||
1481 | <idle>-0 [004] d.s1 470.867175: _raw_spin_lock <-tick_do_update_jiffies64 | ||
1482 | <idle>-0 [004] d.s1 470.867176: add_preempt_count <-_raw_spin_lock | ||
1483 | <idle>-0 [004] d.s2 470.867176: do_timer <-tick_do_update_jiffies64 | ||
1484 | <idle>-0 [004] d.s2 470.867176: _raw_spin_lock <-do_timer | ||
1485 | <idle>-0 [004] d.s2 470.867176: add_preempt_count <-_raw_spin_lock | ||
1486 | <idle>-0 [004] d.s3 470.867177: ntp_tick_length <-do_timer | ||
1487 | <idle>-0 [004] d.s3 470.867177: _raw_spin_lock_irqsave <-ntp_tick_length | ||
1488 | . | ||
1489 | . | ||
1490 | . | ||
1491 | </literallayout> | ||
1492 | Each line in the trace above shows what was happening in | ||
1493 | the kernel on a given cpu, to the level of detail of | ||
1494 | function calls. Each entry shows the function called, | ||
1495 | followed by its caller (after the arrow). | ||
1496 | </para> | ||
1497 | |||
1498 | <para> | ||
1499 | The function tracer gives you an extremely detailed idea | ||
1500 | of what the kernel was doing at the point in time the trace | ||
1501 | was taken, and is a great way to learn about how the kernel | ||
1502 | code works in a dynamic sense. | ||
1503 | </para> | ||
1504 | |||
1505 | <informalexample> | ||
1506 | <emphasis>Tying it Together:</emphasis> The ftrace function tracer is also | ||
1507 | available from within perf, as the ftrace:function tracepoint. | ||
1508 | </informalexample> | ||
1509 | |||
1510 | <para> | ||
1511 | It is a little more difficult to follow the call chains than | ||
1512 | it needs to be - luckily there's a variant of the function | ||
1513 | tracer that displays the callchains explicitly, called the | ||
1514 | 'function_graph' tracer: | ||
1515 | <literallayout class='monospaced'> | ||
1516 | root@sugarbay:/sys/kernel/debug/tracing# echo function_graph > current_tracer | ||
1517 | root@sugarbay:/sys/kernel/debug/tracing# cat trace | less | ||
1518 | |||
1519 | tracer: function_graph | ||
1520 | |||
1521 | CPU DURATION FUNCTION CALLS | ||
1522 | | | | | | | | | ||
1523 | 7) 0.046 us | pick_next_task_fair(); | ||
1524 | 7) 0.043 us | pick_next_task_stop(); | ||
1525 | 7) 0.042 us | pick_next_task_rt(); | ||
1526 | 7) 0.032 us | pick_next_task_fair(); | ||
1527 | 7) 0.030 us | pick_next_task_idle(); | ||
1528 | 7) | _raw_spin_unlock_irq() { | ||
1529 | 7) 0.033 us | sub_preempt_count(); | ||
1530 | 7) 0.258 us | } | ||
1531 | 7) 0.032 us | sub_preempt_count(); | ||
1532 | 7) + 13.341 us | } /* __schedule */ | ||
1533 | 7) 0.095 us | } /* sub_preempt_count */ | ||
1534 | 7) | schedule() { | ||
1535 | 7) | __schedule() { | ||
1536 | 7) 0.060 us | add_preempt_count(); | ||
1537 | 7) 0.044 us | rcu_note_context_switch(); | ||
1538 | 7) | _raw_spin_lock_irq() { | ||
1539 | 7) 0.033 us | add_preempt_count(); | ||
1540 | 7) 0.247 us | } | ||
1541 | 7) | idle_balance() { | ||
1542 | 7) | _raw_spin_unlock() { | ||
1543 | 7) 0.031 us | sub_preempt_count(); | ||
1544 | 7) 0.246 us | } | ||
1545 | 7) | update_shares() { | ||
1546 | 7) 0.030 us | __rcu_read_lock(); | ||
1547 | 7) 0.029 us | __rcu_read_unlock(); | ||
1548 | 7) 0.484 us | } | ||
1549 | 7) 0.030 us | __rcu_read_lock(); | ||
1550 | 7) | load_balance() { | ||
1551 | 7) | find_busiest_group() { | ||
1552 | 7) 0.031 us | idle_cpu(); | ||
1553 | 7) 0.029 us | idle_cpu(); | ||
1554 | 7) 0.035 us | idle_cpu(); | ||
1555 | 7) 0.906 us | } | ||
1556 | 7) 1.141 us | } | ||
1557 | 7) 0.022 us | msecs_to_jiffies(); | ||
1558 | 7) | load_balance() { | ||
1559 | 7) | find_busiest_group() { | ||
1560 | 7) 0.031 us | idle_cpu(); | ||
1561 | . | ||
1562 | . | ||
1563 | . | ||
1564 | 4) 0.062 us | msecs_to_jiffies(); | ||
1565 | 4) 0.062 us | __rcu_read_unlock(); | ||
1566 | 4) | _raw_spin_lock() { | ||
1567 | 4) 0.073 us | add_preempt_count(); | ||
1568 | 4) 0.562 us | } | ||
1569 | 4) + 17.452 us | } | ||
1570 | 4) 0.108 us | put_prev_task_fair(); | ||
1571 | 4) 0.102 us | pick_next_task_fair(); | ||
1572 | 4) 0.084 us | pick_next_task_stop(); | ||
1573 | 4) 0.075 us | pick_next_task_rt(); | ||
1574 | 4) 0.062 us | pick_next_task_fair(); | ||
1575 | 4) 0.066 us | pick_next_task_idle(); | ||
1576 | ------------------------------------------ | ||
1577 | 4) kworker-74 => <idle>-0 | ||
1578 | ------------------------------------------ | ||
1579 | |||
1580 | 4) | finish_task_switch() { | ||
1581 | 4) | _raw_spin_unlock_irq() { | ||
1582 | 4) 0.100 us | sub_preempt_count(); | ||
1583 | 4) 0.582 us | } | ||
1584 | 4) 1.105 us | } | ||
1585 | 4) 0.088 us | sub_preempt_count(); | ||
1586 | 4) ! 100.066 us | } | ||
1587 | . | ||
1588 | . | ||
1589 | . | ||
1590 | 3) | sys_ioctl() { | ||
1591 | 3) 0.083 us | fget_light(); | ||
1592 | 3) | security_file_ioctl() { | ||
1593 | 3) 0.066 us | cap_file_ioctl(); | ||
1594 | 3) 0.562 us | } | ||
1595 | 3) | do_vfs_ioctl() { | ||
1596 | 3) | drm_ioctl() { | ||
1597 | 3) 0.075 us | drm_ut_debug_printk(); | ||
1598 | 3) | i915_gem_pwrite_ioctl() { | ||
1599 | 3) | i915_mutex_lock_interruptible() { | ||
1600 | 3) 0.070 us | mutex_lock_interruptible(); | ||
1601 | 3) 0.570 us | } | ||
1602 | 3) | drm_gem_object_lookup() { | ||
1603 | 3) | _raw_spin_lock() { | ||
1604 | 3) 0.080 us | add_preempt_count(); | ||
1605 | 3) 0.620 us | } | ||
1606 | 3) | _raw_spin_unlock() { | ||
1607 | 3) 0.085 us | sub_preempt_count(); | ||
1608 | 3) 0.562 us | } | ||
1609 | 3) 2.149 us | } | ||
1610 | 3) 0.133 us | i915_gem_object_pin(); | ||
1611 | 3) | i915_gem_object_set_to_gtt_domain() { | ||
1612 | 3) 0.065 us | i915_gem_object_flush_gpu_write_domain(); | ||
1613 | 3) 0.065 us | i915_gem_object_wait_rendering(); | ||
1614 | 3) 0.062 us | i915_gem_object_flush_cpu_write_domain(); | ||
1615 | 3) 1.612 us | } | ||
1616 | 3) | i915_gem_object_put_fence() { | ||
1617 | 3) 0.097 us | i915_gem_object_flush_fence.constprop.36(); | ||
1618 | 3) 0.645 us | } | ||
1619 | 3) 0.070 us | add_preempt_count(); | ||
1620 | 3) 0.070 us | sub_preempt_count(); | ||
1621 | 3) 0.073 us | i915_gem_object_unpin(); | ||
1622 | 3) 0.068 us | mutex_unlock(); | ||
1623 | 3) 9.924 us | } | ||
1624 | 3) + 11.236 us | } | ||
1625 | 3) + 11.770 us | } | ||
1626 | 3) + 13.784 us | } | ||
1627 | 3) | sys_ioctl() { | ||
1628 | </literallayout> | ||
1629 | As you can see, the function_graph display is much easier to | ||
1630 | follow. Also note that in addition to the function calls and | ||
1631 | associated braces, other events such as scheduler events | ||
1632 | are displayed in context. In fact, you can freely include | ||
1633 | any tracepoint available in the trace events subsystem described | ||
1634 | in the next section by simply enabling those events, and they'll | ||
1635 | appear in context in the function graph display. Quite a | ||
1636 | powerful tool for understanding kernel dynamics. | ||
1637 | </para> | ||
1638 | |||
1639 | <para> | ||
1640 | Also notice that there are various annotations on the left | ||
1641 | hand side of the display. For example if the total time it | ||
1642 | took for a given function to execute is above a certain | ||
1643 | threshold, an exclamation point or plus sign appears on the | ||
1644 | left hand side. Please see the ftrace documentation for | ||
1645 | details on all these fields. | ||
1646 | </para> | ||
1647 | </section> | ||
1648 | |||
1649 | <section id='the-trace-events-subsystem'> | ||
1650 | <title>The 'trace events' Subsystem</title> | ||
1651 | |||
1652 | <para> | ||
1653 | One especially important directory contained within | ||
1654 | the /sys/kernel/debug/tracing directory is the 'events' | ||
1655 | subdirectory, which contains representations of every | ||
1656 | tracepoint in the system. Listing out the contents of | ||
1657 | the 'events' subdirectory, we see mainly another set of | ||
1658 | subdirectories: | ||
1659 | <literallayout class='monospaced'> | ||
1660 | root@sugarbay:/sys/kernel/debug/tracing# cd events | ||
1661 | root@sugarbay:/sys/kernel/debug/tracing/events# ls -al | ||
1662 | drwxr-xr-x 38 root root 0 Nov 14 23:19 . | ||
1663 | drwxr-xr-x 5 root root 0 Nov 14 23:19 .. | ||
1664 | drwxr-xr-x 19 root root 0 Nov 14 23:19 block | ||
1665 | drwxr-xr-x 32 root root 0 Nov 14 23:19 btrfs | ||
1666 | drwxr-xr-x 5 root root 0 Nov 14 23:19 drm | ||
1667 | -rw-r--r-- 1 root root 0 Nov 14 23:19 enable | ||
1668 | drwxr-xr-x 40 root root 0 Nov 14 23:19 ext3 | ||
1669 | drwxr-xr-x 79 root root 0 Nov 14 23:19 ext4 | ||
1670 | drwxr-xr-x 14 root root 0 Nov 14 23:19 ftrace | ||
1671 | drwxr-xr-x 8 root root 0 Nov 14 23:19 hda | ||
1672 | -r--r--r-- 1 root root 0 Nov 14 23:19 header_event | ||
1673 | -r--r--r-- 1 root root 0 Nov 14 23:19 header_page | ||
1674 | drwxr-xr-x 25 root root 0 Nov 14 23:19 i915 | ||
1675 | drwxr-xr-x 7 root root 0 Nov 14 23:19 irq | ||
1676 | drwxr-xr-x 12 root root 0 Nov 14 23:19 jbd | ||
1677 | drwxr-xr-x 14 root root 0 Nov 14 23:19 jbd2 | ||
1678 | drwxr-xr-x 14 root root 0 Nov 14 23:19 kmem | ||
1679 | drwxr-xr-x 7 root root 0 Nov 14 23:19 module | ||
1680 | drwxr-xr-x 3 root root 0 Nov 14 23:19 napi | ||
1681 | drwxr-xr-x 6 root root 0 Nov 14 23:19 net | ||
1682 | drwxr-xr-x 3 root root 0 Nov 14 23:19 oom | ||
1683 | drwxr-xr-x 12 root root 0 Nov 14 23:19 power | ||
1684 | drwxr-xr-x 3 root root 0 Nov 14 23:19 printk | ||
1685 | drwxr-xr-x 8 root root 0 Nov 14 23:19 random | ||
1686 | drwxr-xr-x 4 root root 0 Nov 14 23:19 raw_syscalls | ||
1687 | drwxr-xr-x 3 root root 0 Nov 14 23:19 rcu | ||
1688 | drwxr-xr-x 6 root root 0 Nov 14 23:19 rpm | ||
1689 | drwxr-xr-x 20 root root 0 Nov 14 23:19 sched | ||
1690 | drwxr-xr-x 7 root root 0 Nov 14 23:19 scsi | ||
1691 | drwxr-xr-x 4 root root 0 Nov 14 23:19 signal | ||
1692 | drwxr-xr-x 5 root root 0 Nov 14 23:19 skb | ||
1693 | drwxr-xr-x 4 root root 0 Nov 14 23:19 sock | ||
1694 | drwxr-xr-x 10 root root 0 Nov 14 23:19 sunrpc | ||
1695 | drwxr-xr-x 538 root root 0 Nov 14 23:19 syscalls | ||
1696 | drwxr-xr-x 4 root root 0 Nov 14 23:19 task | ||
1697 | drwxr-xr-x 14 root root 0 Nov 14 23:19 timer | ||
1698 | drwxr-xr-x 3 root root 0 Nov 14 23:19 udp | ||
1699 | drwxr-xr-x 21 root root 0 Nov 14 23:19 vmscan | ||
1700 | drwxr-xr-x 3 root root 0 Nov 14 23:19 vsyscall | ||
1701 | drwxr-xr-x 6 root root 0 Nov 14 23:19 workqueue | ||
1702 | drwxr-xr-x 26 root root 0 Nov 14 23:19 writeback | ||
1703 | </literallayout> | ||
1704 | Each one of these subdirectories corresponds to a | ||
1705 | 'subsystem' and contains yet again more subdirectories, | ||
1706 | each one of those finally corresponding to a tracepoint. | ||
1707 | For example, here are the contents of the 'kmem' subsystem: | ||
1708 | <literallayout class='monospaced'> | ||
1709 | root@sugarbay:/sys/kernel/debug/tracing/events# cd kmem | ||
1710 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem# ls -al | ||
1711 | drwxr-xr-x 14 root root 0 Nov 14 23:19 . | ||
1712 | drwxr-xr-x 38 root root 0 Nov 14 23:19 .. | ||
1713 | -rw-r--r-- 1 root root 0 Nov 14 23:19 enable | ||
1714 | -rw-r--r-- 1 root root 0 Nov 14 23:19 filter | ||
1715 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kfree | ||
1716 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kmalloc | ||
1717 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kmalloc_node | ||
1718 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kmem_cache_alloc | ||
1719 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kmem_cache_alloc_node | ||
1720 | drwxr-xr-x 2 root root 0 Nov 14 23:19 kmem_cache_free | ||
1721 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_alloc | ||
1722 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_alloc_extfrag | ||
1723 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_alloc_zone_locked | ||
1724 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_free | ||
1725 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_free_batched | ||
1726 | drwxr-xr-x 2 root root 0 Nov 14 23:19 mm_page_pcpu_drain | ||
1727 | </literallayout> | ||
1728 | Let's see what's inside the subdirectory for a specific | ||
1729 | tracepoint, in this case the one for kmalloc: | ||
1730 | <literallayout class='monospaced'> | ||
1731 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem# cd kmalloc | ||
1732 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# ls -al | ||
1733 | drwxr-xr-x 2 root root 0 Nov 14 23:19 . | ||
1734 | drwxr-xr-x 14 root root 0 Nov 14 23:19 .. | ||
1735 | -rw-r--r-- 1 root root 0 Nov 14 23:19 enable | ||
1736 | -rw-r--r-- 1 root root 0 Nov 14 23:19 filter | ||
1737 | -r--r--r-- 1 root root 0 Nov 14 23:19 format | ||
1738 | -r--r--r-- 1 root root 0 Nov 14 23:19 id | ||
1739 | </literallayout> | ||
1740 | The 'format' file for the tracepoint describes the event | ||
1741 | in memory, which is used by the various tracing tools | ||
1742 | that now make use of these tracepoint to parse the event | ||
1743 | and make sense of it, along with a 'print fmt' field that | ||
1744 | allows tools like ftrace to display the event as text. | ||
1745 | Here's what the format of the kmalloc event looks like: | ||
1746 | <literallayout class='monospaced'> | ||
1747 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# cat format | ||
1748 | name: kmalloc | ||
1749 | ID: 313 | ||
1750 | format: | ||
1751 | field:unsigned short common_type; offset:0; size:2; signed:0; | ||
1752 | field:unsigned char common_flags; offset:2; size:1; signed:0; | ||
1753 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; | ||
1754 | field:int common_pid; offset:4; size:4; signed:1; | ||
1755 | field:int common_padding; offset:8; size:4; signed:1; | ||
1756 | |||
1757 | field:unsigned long call_site; offset:16; size:8; signed:0; | ||
1758 | field:const void * ptr; offset:24; size:8; signed:0; | ||
1759 | field:size_t bytes_req; offset:32; size:8; signed:0; | ||
1760 | field:size_t bytes_alloc; offset:40; size:8; signed:0; | ||
1761 | field:gfp_t gfp_flags; offset:48; size:4; signed:0; | ||
1762 | |||
1763 | print fmt: "call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", REC->call_site, REC->ptr, REC->bytes_req, REC->bytes_alloc, | ||
1764 | (REC->gfp_flags) ? __print_flags(REC->gfp_flags, "|", {(unsigned long)(((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( | ||
1765 | gfp_t)0x20000u) | (( gfp_t)0x02u) | (( gfp_t)0x08u)) | (( gfp_t)0x4000u) | (( gfp_t)0x10000u) | (( gfp_t)0x1000u) | (( gfp_t)0x200u) | (( | ||
1766 | gfp_t)0x400000u)), "GFP_TRANSHUGE"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( gfp_t)0x20000u) | (( | ||
1767 | gfp_t)0x02u) | (( gfp_t)0x08u)), "GFP_HIGHUSER_MOVABLE"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( | ||
1768 | gfp_t)0x20000u) | (( gfp_t)0x02u)), "GFP_HIGHUSER"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( | ||
1769 | gfp_t)0x20000u)), "GFP_USER"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( gfp_t)0x80000u)), GFP_TEMPORARY"}, | ||
1770 | {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u) | (( gfp_t)0x80u)), "GFP_KERNEL"}, {(unsigned long)((( gfp_t)0x10u) | (( gfp_t)0x40u)), | ||
1771 | "GFP_NOFS"}, {(unsigned long)((( gfp_t)0x20u)), "GFP_ATOMIC"}, {(unsigned long)((( gfp_t)0x10u)), "GFP_NOIO"}, {(unsigned long)(( | ||
1772 | gfp_t)0x20u), "GFP_HIGH"}, {(unsigned long)(( gfp_t)0x10u), "GFP_WAIT"}, {(unsigned long)(( gfp_t)0x40u), "GFP_IO"}, {(unsigned long)(( | ||
1773 | gfp_t)0x100u), "GFP_COLD"}, {(unsigned long)(( gfp_t)0x200u), "GFP_NOWARN"}, {(unsigned long)(( gfp_t)0x400u), "GFP_REPEAT"}, {(unsigned | ||
1774 | long)(( gfp_t)0x800u), "GFP_NOFAIL"}, {(unsigned long)(( gfp_t)0x1000u), "GFP_NORETRY"}, {(unsigned long)(( gfp_t)0x4000u), "GFP_COMP"}, | ||
1775 | {(unsigned long)(( gfp_t)0x8000u), "GFP_ZERO"}, {(unsigned long)(( gfp_t)0x10000u), "GFP_NOMEMALLOC"}, {(unsigned long)(( gfp_t)0x20000u), | ||
1776 | "GFP_HARDWALL"}, {(unsigned long)(( gfp_t)0x40000u), "GFP_THISNODE"}, {(unsigned long)(( gfp_t)0x80000u), "GFP_RECLAIMABLE"}, {(unsigned | ||
1777 | long)(( gfp_t)0x08u), "GFP_MOVABLE"}, {(unsigned long)(( gfp_t)0), "GFP_NOTRACK"}, {(unsigned long)(( gfp_t)0x400000u), "GFP_NO_KSWAPD"}, | ||
1778 | {(unsigned long)(( gfp_t)0x800000u), "GFP_OTHER_NODE"} ) : "GFP_NOWAIT" | ||
1779 | </literallayout> | ||
1780 | The 'enable' file in the tracepoint directory is what allows | ||
1781 | the user (or tools such as trace-cmd) to actually turn the | ||
1782 | tracepoint on and off. When enabled, the corresponding | ||
1783 | tracepoint will start appearing in the ftrace 'trace' | ||
1784 | file described previously. For example, this turns on the | ||
1785 | kmalloc tracepoint: | ||
1786 | <literallayout class='monospaced'> | ||
1787 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# echo 1 > enable | ||
1788 | </literallayout> | ||
1789 | At the moment, we're not interested in the function tracer or | ||
1790 | some other tracer that might be in effect, so we first turn | ||
1791 | it off, but if we do that, we still need to turn tracing on in | ||
1792 | order to see the events in the output buffer: | ||
1793 | <literallayout class='monospaced'> | ||
1794 | root@sugarbay:/sys/kernel/debug/tracing# echo nop > current_tracer | ||
1795 | root@sugarbay:/sys/kernel/debug/tracing# echo 1 > tracing_on | ||
1796 | </literallayout> | ||
1797 | Now, if we look at the the 'trace' file, we see nothing | ||
1798 | but the kmalloc events we just turned on: | ||
1799 | <literallayout class='monospaced'> | ||
1800 | root@sugarbay:/sys/kernel/debug/tracing# cat trace | less | ||
1801 | # tracer: nop | ||
1802 | # | ||
1803 | # entries-in-buffer/entries-written: 1897/1897 #P:8 | ||
1804 | # | ||
1805 | # _-----=> irqs-off | ||
1806 | # / _----=> need-resched | ||
1807 | # | / _---=> hardirq/softirq | ||
1808 | # || / _--=> preempt-depth | ||
1809 | # ||| / delay | ||
1810 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
1811 | # | | | |||| | | | ||
1812 | dropbear-1465 [000] ...1 18154.620753: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
1813 | <idle>-0 [000] ..s3 18154.621640: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1814 | <idle>-0 [000] ..s3 18154.621656: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1815 | matchbox-termin-1361 [001] ...1 18154.755472: kmalloc: call_site=ffffffff81614050 ptr=ffff88006d5f0e00 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_KERNEL|GFP_REPEAT | ||
1816 | Xorg-1264 [002] ...1 18154.755581: kmalloc: call_site=ffffffff8141abe8 ptr=ffff8800734f4cc0 bytes_req=168 bytes_alloc=192 gfp_flags=GFP_KERNEL|GFP_NOWARN|GFP_NORETRY | ||
1817 | Xorg-1264 [002] ...1 18154.755583: kmalloc: call_site=ffffffff814192a3 ptr=ffff88001f822520 bytes_req=24 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
1818 | Xorg-1264 [002] ...1 18154.755589: kmalloc: call_site=ffffffff81419edb ptr=ffff8800721a2f00 bytes_req=64 bytes_alloc=64 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
1819 | matchbox-termin-1361 [001] ...1 18155.354594: kmalloc: call_site=ffffffff81614050 ptr=ffff88006db35400 bytes_req=576 bytes_alloc=1024 gfp_flags=GFP_KERNEL|GFP_REPEAT | ||
1820 | Xorg-1264 [002] ...1 18155.354703: kmalloc: call_site=ffffffff8141abe8 ptr=ffff8800734f4cc0 bytes_req=168 bytes_alloc=192 gfp_flags=GFP_KERNEL|GFP_NOWARN|GFP_NORETRY | ||
1821 | Xorg-1264 [002] ...1 18155.354705: kmalloc: call_site=ffffffff814192a3 ptr=ffff88001f822520 bytes_req=24 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
1822 | Xorg-1264 [002] ...1 18155.354711: kmalloc: call_site=ffffffff81419edb ptr=ffff8800721a2f00 bytes_req=64 bytes_alloc=64 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
1823 | <idle>-0 [000] ..s3 18155.673319: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1824 | dropbear-1465 [000] ...1 18155.673525: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
1825 | <idle>-0 [000] ..s3 18155.674821: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1826 | <idle>-0 [000] ..s3 18155.793014: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1827 | dropbear-1465 [000] ...1 18155.793219: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
1828 | <idle>-0 [000] ..s3 18155.794147: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1829 | <idle>-0 [000] ..s3 18155.936705: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1830 | dropbear-1465 [000] ...1 18155.936910: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
1831 | <idle>-0 [000] ..s3 18155.937869: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1832 | matchbox-termin-1361 [001] ...1 18155.953667: kmalloc: call_site=ffffffff81614050 ptr=ffff88006d5f2000 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_KERNEL|GFP_REPEAT | ||
1833 | Xorg-1264 [002] ...1 18155.953775: kmalloc: call_site=ffffffff8141abe8 ptr=ffff8800734f4cc0 bytes_req=168 bytes_alloc=192 gfp_flags=GFP_KERNEL|GFP_NOWARN|GFP_NORETRY | ||
1834 | Xorg-1264 [002] ...1 18155.953777: kmalloc: call_site=ffffffff814192a3 ptr=ffff88001f822520 bytes_req=24 bytes_alloc=32 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
1835 | Xorg-1264 [002] ...1 18155.953783: kmalloc: call_site=ffffffff81419edb ptr=ffff8800721a2f00 bytes_req=64 bytes_alloc=64 gfp_flags=GFP_KERNEL|GFP_ZERO | ||
1836 | <idle>-0 [000] ..s3 18156.176053: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1837 | dropbear-1465 [000] ...1 18156.176257: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
1838 | <idle>-0 [000] ..s3 18156.177717: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1839 | <idle>-0 [000] ..s3 18156.399229: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d555800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1840 | dropbear-1465 [000] ...1 18156.399434: kmalloc: call_site=ffffffff816650d4 ptr=ffff8800729c3000 bytes_http://rostedt.homelinux.com/kernelshark/req=2048 bytes_alloc=2048 gfp_flags=GFP_KERNEL | ||
1841 | <idle>-0 [000] ..s3 18156.400660: kmalloc: call_site=ffffffff81619b36 ptr=ffff88006d554800 bytes_req=512 bytes_alloc=512 gfp_flags=GFP_ATOMIC | ||
1842 | matchbox-termin-1361 [001] ...1 18156.552800: kmalloc: call_site=ffffffff81614050 ptr=ffff88006db34800 bytes_req=576 bytes_alloc=1024 gfp_flags=GFP_KERNEL|GFP_REPEAT | ||
1843 | </literallayout> | ||
1844 | To again disable the kmalloc event, we need to send 0 to the | ||
1845 | enable file: | ||
1846 | <literallayout class='monospaced'> | ||
1847 | root@sugarbay:/sys/kernel/debug/tracing/events/kmem/kmalloc# echo 0 > enable | ||
1848 | </literallayout> | ||
1849 | You can enable any number of events or complete subsystems | ||
1850 | (by using the 'enable' file in the subsystem directory) and | ||
1851 | get an arbitrarily fine-grained idea of what's going on in the | ||
1852 | system by enabling as many of the appropriate tracepoints | ||
1853 | as applicable. | ||
1854 | </para> | ||
1855 | |||
1856 | <para> | ||
1857 | A number of the tools described in this HOWTO do just that, | ||
1858 | including trace-cmd and kernelshark in the next section. | ||
1859 | </para> | ||
1860 | |||
1861 | <informalexample> | ||
1862 | <emphasis>Tying it Together:</emphasis> These tracepoints and their representation | ||
1863 | are used not only by ftrace, but by many of the other tools | ||
1864 | covered in this document and they form a central point of | ||
1865 | integration for the various tracers available in Linux. | ||
1866 | They form a central part of the instrumentation for the | ||
1867 | following tools: perf, lttng, ftrace, blktrace and SystemTap | ||
1868 | </informalexample> | ||
1869 | |||
1870 | <informalexample> | ||
1871 | <emphasis>Tying it Together:</emphasis> Eventually all the special-purpose tracers | ||
1872 | currently available in /sys/kernel/debug/tracing will be | ||
1873 | removed and replaced with equivalent tracers based on the | ||
1874 | 'trace events' subsystem. | ||
1875 | </informalexample> | ||
1876 | </section> | ||
1877 | |||
1878 | <section id='trace-cmd-kernelshark'> | ||
1879 | <title>trace-cmd/kernelshark</title> | ||
1880 | |||
1881 | <para> | ||
1882 | trace-cmd is essentially an extensive command-line 'wrapper' | ||
1883 | interface that hides the details of all the individual files | ||
1884 | in /sys/kernel/debug/tracing, allowing users to specify | ||
1885 | specific particular events within the | ||
1886 | /sys/kernel/debug/tracing/events/ subdirectory and to collect | ||
1887 | traces and avoid having to deal with those details directly. | ||
1888 | </para> | ||
1889 | |||
1890 | <para> | ||
1891 | As yet another layer on top of that, kernelshark provides a GUI | ||
1892 | that allows users to start and stop traces and specify sets | ||
1893 | of events using an intuitive interface, and view the | ||
1894 | output as both trace events and as a per-CPU graphical | ||
1895 | display. It directly uses 'trace-cmd' as the plumbing | ||
1896 | that accomplishes all that underneath the covers (and | ||
1897 | actually displays the trace-cmd command it uses, as we'll see). | ||
1898 | </para> | ||
1899 | |||
1900 | <para> | ||
1901 | To start a trace using kernelshark, first start kernelshark: | ||
1902 | <literallayout class='monospaced'> | ||
1903 | root@sugarbay:~# kernelshark | ||
1904 | </literallayout> | ||
1905 | Then bring up the 'Capture' dialog by choosing from the | ||
1906 | kernelshark menu: | ||
1907 | <literallayout class='monospaced'> | ||
1908 | Capture | Record | ||
1909 | </literallayout> | ||
1910 | That will display the following dialog, which allows you to | ||
1911 | choose one or more events (or even one or more complete | ||
1912 | subsystems) to trace: | ||
1913 | </para> | ||
1914 | |||
1915 | <para> | ||
1916 | <imagedata fileref="figures/kernelshark-choose-events.png" width="6in" depth="6in" align="center" scalefit="1" /> | ||
1917 | </para> | ||
1918 | |||
1919 | <para> | ||
1920 | Note that these are exactly the same sets of events described | ||
1921 | in the previous trace events subsystem section, and in fact | ||
1922 | is where trace-cmd gets them for kernelshark. | ||
1923 | </para> | ||
1924 | |||
1925 | <para> | ||
1926 | In the above screenshot, we've decided to explore the | ||
1927 | graphics subsystem a bit and so have chosen to trace all | ||
1928 | the tracepoints contained within the 'i915' and 'drm' | ||
1929 | subsystems. | ||
1930 | </para> | ||
1931 | |||
1932 | <para> | ||
1933 | After doing that, we can start and stop the trace using | ||
1934 | the 'Run' and 'Stop' button on the lower right corner of | ||
1935 | the dialog (the same button will turn into the 'Stop' | ||
1936 | button after the trace has started): | ||
1937 | </para> | ||
1938 | |||
1939 | <para> | ||
1940 | <imagedata fileref="figures/kernelshark-output-display.png" width="6in" depth="6in" align="center" scalefit="1" /> | ||
1941 | </para> | ||
1942 | |||
1943 | <para> | ||
1944 | Notice that the right-hand pane shows the exact trace-cmd | ||
1945 | command-line that's used to run the trace, along with the | ||
1946 | results of the trace-cmd run. | ||
1947 | </para> | ||
1948 | |||
1949 | <para> | ||
1950 | Once the 'Stop' button is pressed, the graphical view magically | ||
1951 | fills up with a colorful per-cpu display of the trace data, | ||
1952 | along with the detailed event listing below that: | ||
1953 | </para> | ||
1954 | |||
1955 | <para> | ||
1956 | <imagedata fileref="figures/kernelshark-i915-display.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
1957 | </para> | ||
1958 | |||
1959 | <para> | ||
1960 | Here's another example, this time a display resulting | ||
1961 | from tracing 'all events': | ||
1962 | </para> | ||
1963 | |||
1964 | <para> | ||
1965 | <imagedata fileref="figures/kernelshark-all.png" width="6in" depth="7in" align="center" scalefit="1" /> | ||
1966 | </para> | ||
1967 | |||
1968 | <para> | ||
1969 | The tool is pretty self-explanatory, but for more detailed | ||
1970 | information on navigating through the data, see the | ||
1971 | <ulink url='http://rostedt.homelinux.com/kernelshark/'>kernelshark website</ulink>. | ||
1972 | </para> | ||
1973 | </section> | ||
1974 | |||
1975 | <section id='ftrace-documentation'> | ||
1976 | <title>Documentation</title> | ||
1977 | |||
1978 | <para> | ||
1979 | The documentation for ftrace can be found in the kernel | ||
1980 | Documentation directory: | ||
1981 | <literallayout class='monospaced'> | ||
1982 | Documentation/trace/ftrace.txt | ||
1983 | </literallayout> | ||
1984 | The documentation for the trace event subsystem can also | ||
1985 | be found in the kernel Documentation directory: | ||
1986 | <literallayout class='monospaced'> | ||
1987 | Documentation/trace/events.txt | ||
1988 | </literallayout> | ||
1989 | There is a nice series of articles on using | ||
1990 | ftrace and trace-cmd at LWN: | ||
1991 | <itemizedlist> | ||
1992 | <listitem><para><ulink url='http://lwn.net/Articles/365835/'>Debugging the kernel using Ftrace - part 1</ulink> | ||
1993 | </para></listitem> | ||
1994 | <listitem><para><ulink url='http://lwn.net/Articles/366796/'>Debugging the kernel using Ftrace - part 2</ulink> | ||
1995 | </para></listitem> | ||
1996 | <listitem><para><ulink url='http://lwn.net/Articles/370423/'>Secrets of the Ftrace function tracer</ulink> | ||
1997 | </para></listitem> | ||
1998 | <listitem><para><ulink url='https://lwn.net/Articles/410200/'>trace-cmd: A front-end for Ftrace</ulink> | ||
1999 | </para></listitem> | ||
2000 | </itemizedlist> | ||
2001 | </para> | ||
2002 | |||
2003 | <para> | ||
2004 | There's more detailed documentation kernelshark usage here: | ||
2005 | <ulink url='http://rostedt.homelinux.com/kernelshark/'>KernelShark</ulink> | ||
2006 | </para> | ||
2007 | |||
2008 | <para> | ||
2009 | An amusing yet useful README (a tracing mini-HOWTO) can be | ||
2010 | found in /sys/kernel/debug/tracing/README. | ||
2011 | </para> | ||
2012 | </section> | ||
2013 | </section> | ||
2014 | |||
2015 | <section id='profile-manual-systemtap'> | ||
2016 | <title>systemtap</title> | ||
2017 | |||
2018 | <para> | ||
2019 | SystemTap is a system-wide script-based tracing and profiling tool. | ||
2020 | </para> | ||
2021 | |||
2022 | <para> | ||
2023 | SystemTap scripts are C-like programs that are executed in the | ||
2024 | kernel to gather/print/aggregate data extracted from the context | ||
2025 | they end up being invoked under. | ||
2026 | </para> | ||
2027 | |||
2028 | <para> | ||
2029 | For example, this probe from the | ||
2030 | <ulink url='http://sourceware.org/systemtap/tutorial/'>SystemTap tutorial</ulink> | ||
2031 | simply prints a line every time any process on the system open()s | ||
2032 | a file. For each line, it prints the executable name of the | ||
2033 | program that opened the file, along with its PID, and the name | ||
2034 | of the file it opened (or tried to open), which it extracts | ||
2035 | from the open syscall's argstr. | ||
2036 | <literallayout class='monospaced'> | ||
2037 | probe syscall.open | ||
2038 | { | ||
2039 | printf ("%s(%d) open (%s)\n", execname(), pid(), argstr) | ||
2040 | } | ||
2041 | |||
2042 | probe timer.ms(4000) # after 4 seconds | ||
2043 | { | ||
2044 | exit () | ||
2045 | } | ||
2046 | </literallayout> | ||
2047 | Normally, to execute this probe, you'd simply install | ||
2048 | systemtap on the system you want to probe, and directly run | ||
2049 | the probe on that system e.g. assuming the name of the file | ||
2050 | containing the above text is trace_open.stp: | ||
2051 | <literallayout class='monospaced'> | ||
2052 | # stap trace_open.stp | ||
2053 | </literallayout> | ||
2054 | What systemtap does under the covers to run this probe is 1) | ||
2055 | parse and convert the probe to an equivalent 'C' form, 2) | ||
2056 | compile the 'C' form into a kernel module, 3) insert the | ||
2057 | module into the kernel, which arms it, and 4) collect the data | ||
2058 | generated by the probe and display it to the user. | ||
2059 | </para> | ||
2060 | |||
2061 | <para> | ||
2062 | In order to accomplish steps 1 and 2, the 'stap' program needs | ||
2063 | access to the kernel build system that produced the kernel | ||
2064 | that the probed system is running. In the case of a typical | ||
2065 | embedded system (the 'target'), the kernel build system | ||
2066 | unfortunately isn't typically part of the image running on | ||
2067 | the target. It is normally available on the 'host' system | ||
2068 | that produced the target image however; in such cases, | ||
2069 | steps 1 and 2 are executed on the host system, and steps | ||
2070 | 3 and 4 are executed on the target system, using only the | ||
2071 | systemtap 'runtime'. | ||
2072 | </para> | ||
2073 | |||
2074 | <para> | ||
2075 | The systemtap support in Yocto assumes that only steps | ||
2076 | 3 and 4 are run on the target; it is possible to do | ||
2077 | everything on the target, but this section assumes only | ||
2078 | the typical embedded use-case. | ||
2079 | </para> | ||
2080 | |||
2081 | <para> | ||
2082 | So basically what you need to do in order to run a systemtap | ||
2083 | script on the target is to 1) on the host system, compile the | ||
2084 | probe into a kernel module that makes sense to the target, 2) | ||
2085 | copy the module onto the target system and 3) insert the | ||
2086 | module into the target kernel, which arms it, and 4) collect | ||
2087 | the data generated by the probe and display it to the user. | ||
2088 | </para> | ||
2089 | |||
2090 | <section id='systemtap-setup'> | ||
2091 | <title>Setup</title> | ||
2092 | |||
2093 | <para> | ||
2094 | Those are a lot of steps and a lot of details, but | ||
2095 | fortunately Yocto includes a script called 'crosstap' | ||
2096 | that will take care of those details, allowing you to | ||
2097 | simply execute a systemtap script on the remote target, | ||
2098 | with arguments if necessary. | ||
2099 | </para> | ||
2100 | |||
2101 | <para> | ||
2102 | In order to do this from a remote host, however, you | ||
2103 | need to have access to the build for the image you | ||
2104 | booted. The 'crosstap' script provides details on how | ||
2105 | to do this if you run the script on the host without having | ||
2106 | done a build: | ||
2107 | <note> | ||
2108 | SystemTap, which uses 'crosstap', assumes you can establish an | ||
2109 | ssh connection to the remote target. | ||
2110 | Please refer to the crosstap wiki page for details on verifying | ||
2111 | ssh connections at | ||
2112 | <ulink url='https://wiki.yoctoproject.org/wiki/Tracing_and_Profiling#systemtap'></ulink>. | ||
2113 | Also, the ability to ssh into the target system is not enabled | ||
2114 | by default in *-minimal images. | ||
2115 | </note> | ||
2116 | <literallayout class='monospaced'> | ||
2117 | $ crosstap root@192.168.1.88 trace_open.stp | ||
2118 | |||
2119 | Error: No target kernel build found. | ||
2120 | Did you forget to create a local build of your image? | ||
2121 | |||
2122 | 'crosstap' requires a local sdk build of the target system | ||
2123 | (or a build that includes 'tools-profile') in order to build | ||
2124 | kernel modules that can probe the target system. | ||
2125 | |||
2126 | Practically speaking, that means you need to do the following: | ||
2127 | - If you're running a pre-built image, download the release | ||
2128 | and/or BSP tarballs used to build the image. | ||
2129 | - If you're working from git sources, just clone the metadata | ||
2130 | and BSP layers needed to build the image you'll be booting. | ||
2131 | - Make sure you're properly set up to build a new image (see | ||
2132 | the BSP README and/or the widely available basic documentation | ||
2133 | that discusses how to build images). | ||
2134 | - Build an -sdk version of the image e.g.: | ||
2135 | $ bitbake core-image-sato-sdk | ||
2136 | OR | ||
2137 | - Build a non-sdk image but include the profiling tools: | ||
2138 | [ edit local.conf and add 'tools-profile' to the end of | ||
2139 | the EXTRA_IMAGE_FEATURES variable ] | ||
2140 | $ bitbake core-image-sato | ||
2141 | |||
2142 | Once you've build the image on the host system, you're ready to | ||
2143 | boot it (or the equivalent pre-built image) and use 'crosstap' | ||
2144 | to probe it (you need to source the environment as usual first): | ||
2145 | |||
2146 | $ source oe-init-build-env | ||
2147 | $ cd ~/my/systemtap/scripts | ||
2148 | $ crosstap root@192.168.1.xxx myscript.stp | ||
2149 | </literallayout> | ||
2150 | So essentially what you need to do is build an SDK image or | ||
2151 | image with 'tools-profile' as detailed in the | ||
2152 | "<link linkend='profile-manual-general-setup'>General Setup</link>" | ||
2153 | section of this manual, and boot the resulting target image. | ||
2154 | </para> | ||
2155 | |||
2156 | <note> | ||
2157 | If you have a build directory containing multiple machines, | ||
2158 | you need to have the MACHINE you're connecting to selected | ||
2159 | in local.conf, and the kernel in that machine's build | ||
2160 | directory must match the kernel on the booted system exactly, | ||
2161 | or you'll get the above 'crosstap' message when you try to | ||
2162 | invoke a script. | ||
2163 | </note> | ||
2164 | </section> | ||
2165 | |||
2166 | <section id='running-a-script-on-a-target'> | ||
2167 | <title>Running a Script on a Target</title> | ||
2168 | |||
2169 | <para> | ||
2170 | Once you've done that, you should be able to run a systemtap | ||
2171 | script on the target: | ||
2172 | <literallayout class='monospaced'> | ||
2173 | $ cd /path/to/yocto | ||
2174 | $ source oe-init-build-env | ||
2175 | |||
2176 | ### Shell environment set up for builds. ### | ||
2177 | |||
2178 | You can now run 'bitbake <target>' | ||
2179 | |||
2180 | Common targets are: | ||
2181 | core-image-minimal | ||
2182 | core-image-sato | ||
2183 | meta-toolchain | ||
2184 | meta-ide-support | ||
2185 | |||
2186 | You can also run generated qemu images with a command like 'runqemu qemux86-64' | ||
2187 | |||
2188 | </literallayout> | ||
2189 | Once you've done that, you can cd to whatever directory | ||
2190 | contains your scripts and use 'crosstap' to run the script: | ||
2191 | <literallayout class='monospaced'> | ||
2192 | $ cd /path/to/my/systemap/script | ||
2193 | $ crosstap root@192.168.7.2 trace_open.stp | ||
2194 | </literallayout> | ||
2195 | If you get an error connecting to the target e.g.: | ||
2196 | <literallayout class='monospaced'> | ||
2197 | $ crosstap root@192.168.7.2 trace_open.stp | ||
2198 | error establishing ssh connection on remote 'root@192.168.7.2' | ||
2199 | </literallayout> | ||
2200 | Try ssh'ing to the target and see what happens: | ||
2201 | <literallayout class='monospaced'> | ||
2202 | $ ssh root@192.168.7.2 | ||
2203 | </literallayout> | ||
2204 | A lot of the time, connection problems are due specifying a | ||
2205 | wrong IP address or having a 'host key verification error'. | ||
2206 | </para> | ||
2207 | |||
2208 | <para> | ||
2209 | If everything worked as planned, you should see something | ||
2210 | like this (enter the password when prompted, or press enter | ||
2211 | if it's set up to use no password): | ||
2212 | <literallayout class='monospaced'> | ||
2213 | $ crosstap root@192.168.7.2 trace_open.stp | ||
2214 | root@192.168.7.2's password: | ||
2215 | matchbox-termin(1036) open ("/tmp/vte3FS2LW", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 0600) | ||
2216 | matchbox-termin(1036) open ("/tmp/vteJMC7LW", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 0600) | ||
2217 | </literallayout> | ||
2218 | </para> | ||
2219 | </section> | ||
2220 | |||
2221 | <section id='systemtap-documentation'> | ||
2222 | <title>Documentation</title> | ||
2223 | |||
2224 | <para> | ||
2225 | The SystemTap language reference can be found here: | ||
2226 | <ulink url='http://sourceware.org/systemtap/langref/'>SystemTap Language Reference</ulink> | ||
2227 | </para> | ||
2228 | |||
2229 | <para> | ||
2230 | Links to other SystemTap documents, tutorials, and examples can be | ||
2231 | found here: | ||
2232 | <ulink url='http://sourceware.org/systemtap/documentation.html'>SystemTap documentation page</ulink> | ||
2233 | </para> | ||
2234 | </section> | ||
2235 | </section> | ||
2236 | |||
2237 | <section id='profile-manual-sysprof'> | ||
2238 | <title>Sysprof</title> | ||
2239 | |||
2240 | <para> | ||
2241 | Sysprof is a very easy to use system-wide profiler that consists | ||
2242 | of a single window with three panes and a few buttons which allow | ||
2243 | you to start, stop, and view the profile from one place. | ||
2244 | </para> | ||
2245 | |||
2246 | <section id='sysprof-setup'> | ||
2247 | <title>Setup</title> | ||
2248 | |||
2249 | <para> | ||
2250 | For this section, we'll assume you've already performed the | ||
2251 | basic setup outlined in the General Setup section. | ||
2252 | </para> | ||
2253 | |||
2254 | <para> | ||
2255 | Sysprof is a GUI-based application that runs on the target | ||
2256 | system. For the rest of this document we assume you've | ||
2257 | ssh'ed to the host and will be running Sysprof on the | ||
2258 | target (you can use the '-X' option to ssh and have the | ||
2259 | Sysprof GUI run on the target but display remotely on the | ||
2260 | host if you want). | ||
2261 | </para> | ||
2262 | </section> | ||
2263 | |||
2264 | <section id='sysprof-basic-usage'> | ||
2265 | <title>Basic Usage</title> | ||
2266 | |||
2267 | <para> | ||
2268 | To start profiling the system, you simply press the 'Start' | ||
2269 | button. To stop profiling and to start viewing the profile data | ||
2270 | in one easy step, press the 'Profile' button. | ||
2271 | </para> | ||
2272 | |||
2273 | <para> | ||
2274 | Once you've pressed the profile button, the three panes will | ||
2275 | fill up with profiling data: | ||
2276 | </para> | ||
2277 | |||
2278 | <para> | ||
2279 | <imagedata fileref="figures/sysprof-copy-to-user.png" width="6in" depth="4in" align="center" scalefit="1" /> | ||
2280 | </para> | ||
2281 | |||
2282 | <para> | ||
2283 | The left pane shows a list of functions and processes. | ||
2284 | Selecting one of those expands that function in the right | ||
2285 | pane, showing all its callees. Note that this caller-oriented | ||
2286 | display is essentially the inverse of perf's default | ||
2287 | callee-oriented callchain display. | ||
2288 | </para> | ||
2289 | |||
2290 | <para> | ||
2291 | In the screenshot above, we're focusing on __copy_to_user_ll() | ||
2292 | and looking up the callchain we can see that one of the callers | ||
2293 | of __copy_to_user_ll is sys_read() and the complete callpath | ||
2294 | between them. Notice that this is essentially a portion of the | ||
2295 | same information we saw in the perf display shown in the perf | ||
2296 | section of this page. | ||
2297 | </para> | ||
2298 | |||
2299 | <para> | ||
2300 | <imagedata fileref="figures/sysprof-copy-from-user.png" width="6in" depth="4in" align="center" scalefit="1" /> | ||
2301 | </para> | ||
2302 | |||
2303 | <para> | ||
2304 | Similarly, the above is a snapshot of the Sysprof display of a | ||
2305 | copy-from-user callchain. | ||
2306 | </para> | ||
2307 | |||
2308 | <para> | ||
2309 | Finally, looking at the third Sysprof pane in the lower left, | ||
2310 | we can see a list of all the callers of a particular function | ||
2311 | selected in the top left pane. In this case, the lower pane is | ||
2312 | showing all the callers of __mark_inode_dirty: | ||
2313 | </para> | ||
2314 | |||
2315 | <para> | ||
2316 | <imagedata fileref="figures/sysprof-callers.png" width="6in" depth="4in" align="center" scalefit="1" /> | ||
2317 | </para> | ||
2318 | |||
2319 | <para> | ||
2320 | Double-clicking on one of those functions will in turn change the | ||
2321 | focus to the selected function, and so on. | ||
2322 | </para> | ||
2323 | |||
2324 | <informalexample> | ||
2325 | <emphasis>Tying it Together:</emphasis> If you like sysprof's 'caller-oriented' | ||
2326 | display, you may be able to approximate it in other tools as | ||
2327 | well. For example, 'perf report' has the -g (--call-graph) | ||
2328 | option that you can experiment with; one of the options is | ||
2329 | 'caller' for an inverted caller-based callgraph display. | ||
2330 | </informalexample> | ||
2331 | </section> | ||
2332 | |||
2333 | <section id='sysprof-documentation'> | ||
2334 | <title>Documentation</title> | ||
2335 | |||
2336 | <para> | ||
2337 | There doesn't seem to be any documentation for Sysprof, but | ||
2338 | maybe that's because it's pretty self-explanatory. | ||
2339 | The Sysprof website, however, is here: | ||
2340 | <ulink url='http://sysprof.com/'>Sysprof, System-wide Performance Profiler for Linux</ulink> | ||
2341 | </para> | ||
2342 | </section> | ||
2343 | </section> | ||
2344 | |||
2345 | <section id='lttng-linux-trace-toolkit-next-generation'> | ||
2346 | <title>LTTng (Linux Trace Toolkit, next generation)</title> | ||
2347 | |||
2348 | <section id='lttng-setup'> | ||
2349 | <title>Setup</title> | ||
2350 | |||
2351 | <para> | ||
2352 | For this section, we'll assume you've already performed the | ||
2353 | basic setup outlined in the General Setup section. | ||
2354 | LTTng is run on the target system by ssh'ing to it. | ||
2355 | </para> | ||
2356 | </section> | ||
2357 | |||
2358 | <section id='collecting-and-viewing-traces'> | ||
2359 | <title>Collecting and Viewing Traces</title> | ||
2360 | |||
2361 | <para> | ||
2362 | Once you've applied the above commits and built and booted your | ||
2363 | image (you need to build the core-image-sato-sdk image or use one of the | ||
2364 | other methods described in the General Setup section), you're | ||
2365 | ready to start tracing. | ||
2366 | </para> | ||
2367 | |||
2368 | <section id='collecting-and-viewing-a-trace-on-the-target-inside-a-shell'> | ||
2369 | <title>Collecting and viewing a trace on the target (inside a shell)</title> | ||
2370 | |||
2371 | <para> | ||
2372 | First, from the host, ssh to the target: | ||
2373 | <literallayout class='monospaced'> | ||
2374 | $ ssh -l root 192.168.1.47 | ||
2375 | The authenticity of host '192.168.1.47 (192.168.1.47)' can't be established. | ||
2376 | RSA key fingerprint is 23:bd:c8:b1:a8:71:52:00:ee:00:4f:64:9e:10:b9:7e. | ||
2377 | Are you sure you want to continue connecting (yes/no)? yes | ||
2378 | Warning: Permanently added '192.168.1.47' (RSA) to the list of known hosts. | ||
2379 | root@192.168.1.47's password: | ||
2380 | </literallayout> | ||
2381 | Once on the target, use these steps to create a trace: | ||
2382 | <literallayout class='monospaced'> | ||
2383 | root@crownbay:~# lttng create | ||
2384 | Spawning a session daemon | ||
2385 | Session auto-20121015-232120 created. | ||
2386 | Traces will be written in /home/root/lttng-traces/auto-20121015-232120 | ||
2387 | </literallayout> | ||
2388 | Enable the events you want to trace (in this case all | ||
2389 | kernel events): | ||
2390 | <literallayout class='monospaced'> | ||
2391 | root@crownbay:~# lttng enable-event --kernel --all | ||
2392 | All kernel events are enabled in channel channel0 | ||
2393 | </literallayout> | ||
2394 | Start the trace: | ||
2395 | <literallayout class='monospaced'> | ||
2396 | root@crownbay:~# lttng start | ||
2397 | Tracing started for session auto-20121015-232120 | ||
2398 | </literallayout> | ||
2399 | And then stop the trace after awhile or after running | ||
2400 | a particular workload that you want to trace: | ||
2401 | <literallayout class='monospaced'> | ||
2402 | root@crownbay:~# lttng stop | ||
2403 | Tracing stopped for session auto-20121015-232120 | ||
2404 | </literallayout> | ||
2405 | You can now view the trace in text form on the target: | ||
2406 | <literallayout class='monospaced'> | ||
2407 | root@crownbay:~# lttng view | ||
2408 | [23:21:56.989270399] (+?.?????????) sys_geteuid: { 1 }, { } | ||
2409 | [23:21:56.989278081] (+0.000007682) exit_syscall: { 1 }, { ret = 0 } | ||
2410 | [23:21:56.989286043] (+0.000007962) sys_pipe: { 1 }, { fildes = 0xB77B9E8C } | ||
2411 | [23:21:56.989321802] (+0.000035759) exit_syscall: { 1 }, { ret = 0 } | ||
2412 | [23:21:56.989329345] (+0.000007543) sys_mmap_pgoff: { 1 }, { addr = 0x0, len = 10485760, prot = 3, flags = 131362, fd = 4294967295, pgoff = 0 } | ||
2413 | [23:21:56.989351694] (+0.000022349) exit_syscall: { 1 }, { ret = -1247805440 } | ||
2414 | [23:21:56.989432989] (+0.000081295) sys_clone: { 1 }, { clone_flags = 0x411, newsp = 0xB5EFFFE4, parent_tid = 0xFFFFFFFF, child_tid = 0x0 } | ||
2415 | [23:21:56.989477129] (+0.000044140) sched_stat_runtime: { 1 }, { comm = "lttng-consumerd", tid = 1193, runtime = 681660, vruntime = 43367983388 } | ||
2416 | [23:21:56.989486697] (+0.000009568) sched_migrate_task: { 1 }, { comm = "lttng-consumerd", tid = 1193, prio = 20, orig_cpu = 1, dest_cpu = 1 } | ||
2417 | [23:21:56.989508418] (+0.000021721) hrtimer_init: { 1 }, { hrtimer = 3970832076, clockid = 1, mode = 1 } | ||
2418 | [23:21:56.989770462] (+0.000262044) hrtimer_cancel: { 1 }, { hrtimer = 3993865440 } | ||
2419 | [23:21:56.989771580] (+0.000001118) hrtimer_cancel: { 0 }, { hrtimer = 3993812192 } | ||
2420 | [23:21:56.989776957] (+0.000005377) hrtimer_expire_entry: { 1 }, { hrtimer = 3993865440, now = 79815980007057, function = 3238465232 } | ||
2421 | [23:21:56.989778145] (+0.000001188) hrtimer_expire_entry: { 0 }, { hrtimer = 3993812192, now = 79815980008174, function = 3238465232 } | ||
2422 | [23:21:56.989791695] (+0.000013550) softirq_raise: { 1 }, { vec = 1 } | ||
2423 | [23:21:56.989795396] (+0.000003701) softirq_raise: { 0 }, { vec = 1 } | ||
2424 | [23:21:56.989800635] (+0.000005239) softirq_raise: { 0 }, { vec = 9 } | ||
2425 | [23:21:56.989807130] (+0.000006495) sched_stat_runtime: { 1 }, { comm = "lttng-consumerd", tid = 1193, runtime = 330710, vruntime = 43368314098 } | ||
2426 | [23:21:56.989809993] (+0.000002863) sched_stat_runtime: { 0 }, { comm = "lttng-sessiond", tid = 1181, runtime = 1015313, vruntime = 36976733240 } | ||
2427 | [23:21:56.989818514] (+0.000008521) hrtimer_expire_exit: { 0 }, { hrtimer = 3993812192 } | ||
2428 | [23:21:56.989819631] (+0.000001117) hrtimer_expire_exit: { 1 }, { hrtimer = 3993865440 } | ||
2429 | [23:21:56.989821866] (+0.000002235) hrtimer_start: { 0 }, { hrtimer = 3993812192, function = 3238465232, expires = 79815981000000, softexpires = 79815981000000 } | ||
2430 | [23:21:56.989822984] (+0.000001118) hrtimer_start: { 1 }, { hrtimer = 3993865440, function = 3238465232, expires = 79815981000000, softexpires = 79815981000000 } | ||
2431 | [23:21:56.989832762] (+0.000009778) softirq_entry: { 1 }, { vec = 1 } | ||
2432 | [23:21:56.989833879] (+0.000001117) softirq_entry: { 0 }, { vec = 1 } | ||
2433 | [23:21:56.989838069] (+0.000004190) timer_cancel: { 1 }, { timer = 3993871956 } | ||
2434 | [23:21:56.989839187] (+0.000001118) timer_cancel: { 0 }, { timer = 3993818708 } | ||
2435 | [23:21:56.989841492] (+0.000002305) timer_expire_entry: { 1 }, { timer = 3993871956, now = 79515980, function = 3238277552 } | ||
2436 | [23:21:56.989842819] (+0.000001327) timer_expire_entry: { 0 }, { timer = 3993818708, now = 79515980, function = 3238277552 } | ||
2437 | [23:21:56.989854831] (+0.000012012) sched_stat_runtime: { 1 }, { comm = "lttng-consumerd", tid = 1193, runtime = 49237, vruntime = 43368363335 } | ||
2438 | [23:21:56.989855949] (+0.000001118) sched_stat_runtime: { 0 }, { comm = "lttng-sessiond", tid = 1181, runtime = 45121, vruntime = 36976778361 } | ||
2439 | [23:21:56.989861257] (+0.000005308) sched_stat_sleep: { 1 }, { comm = "kworker/1:1", tid = 21, delay = 9451318 } | ||
2440 | [23:21:56.989862374] (+0.000001117) sched_stat_sleep: { 0 }, { comm = "kworker/0:0", tid = 4, delay = 9958820 } | ||
2441 | [23:21:56.989868241] (+0.000005867) sched_wakeup: { 0 }, { comm = "kworker/0:0", tid = 4, prio = 120, success = 1, target_cpu = 0 } | ||
2442 | [23:21:56.989869358] (+0.000001117) sched_wakeup: { 1 }, { comm = "kworker/1:1", tid = 21, prio = 120, success = 1, target_cpu = 1 } | ||
2443 | [23:21:56.989877460] (+0.000008102) timer_expire_exit: { 1 }, { timer = 3993871956 } | ||
2444 | [23:21:56.989878577] (+0.000001117) timer_expire_exit: { 0 }, { timer = 3993818708 } | ||
2445 | . | ||
2446 | . | ||
2447 | . | ||
2448 | </literallayout> | ||
2449 | You can now safely destroy the trace session (note that | ||
2450 | this doesn't delete the trace - it's still there | ||
2451 | in ~/lttng-traces): | ||
2452 | <literallayout class='monospaced'> | ||
2453 | root@crownbay:~# lttng destroy | ||
2454 | Session auto-20121015-232120 destroyed at /home/root | ||
2455 | </literallayout> | ||
2456 | Note that the trace is saved in a directory of the same | ||
2457 | name as returned by 'lttng create', under the ~/lttng-traces | ||
2458 | directory (note that you can change this by supplying your | ||
2459 | own name to 'lttng create'): | ||
2460 | <literallayout class='monospaced'> | ||
2461 | root@crownbay:~# ls -al ~/lttng-traces | ||
2462 | drwxrwx--- 3 root root 1024 Oct 15 23:21 . | ||
2463 | drwxr-xr-x 5 root root 1024 Oct 15 23:57 .. | ||
2464 | drwxrwx--- 3 root root 1024 Oct 15 23:21 auto-20121015-232120 | ||
2465 | </literallayout> | ||
2466 | </para> | ||
2467 | </section> | ||
2468 | |||
2469 | <section id='collecting-and-viewing-a-userspace-trace-on-the-target-inside-a-shell'> | ||
2470 | <title>Collecting and viewing a userspace trace on the target (inside a shell)</title> | ||
2471 | |||
2472 | <para> | ||
2473 | For LTTng userspace tracing, you need to have a properly | ||
2474 | instrumented userspace program. For this example, we'll use | ||
2475 | the 'hello' test program generated by the lttng-ust build. | ||
2476 | </para> | ||
2477 | |||
2478 | <para> | ||
2479 | The 'hello' test program isn't installed on the rootfs by | ||
2480 | the lttng-ust build, so we need to copy it over manually. | ||
2481 | First cd into the build directory that contains the hello | ||
2482 | executable: | ||
2483 | <literallayout class='monospaced'> | ||
2484 | $ cd build/tmp/work/core2_32-poky-linux/lttng-ust/2.0.5-r0/git/tests/hello/.libs | ||
2485 | </literallayout> | ||
2486 | Copy that over to the target machine: | ||
2487 | <literallayout class='monospaced'> | ||
2488 | $ scp hello root@192.168.1.20: | ||
2489 | </literallayout> | ||
2490 | You now have the instrumented lttng 'hello world' test | ||
2491 | program on the target, ready to test. | ||
2492 | </para> | ||
2493 | |||
2494 | <para> | ||
2495 | First, from the host, ssh to the target: | ||
2496 | <literallayout class='monospaced'> | ||
2497 | $ ssh -l root 192.168.1.47 | ||
2498 | The authenticity of host '192.168.1.47 (192.168.1.47)' can't be established. | ||
2499 | RSA key fingerprint is 23:bd:c8:b1:a8:71:52:00:ee:00:4f:64:9e:10:b9:7e. | ||
2500 | Are you sure you want to continue connecting (yes/no)? yes | ||
2501 | Warning: Permanently added '192.168.1.47' (RSA) to the list of known hosts. | ||
2502 | root@192.168.1.47's password: | ||
2503 | </literallayout> | ||
2504 | Once on the target, use these steps to create a trace: | ||
2505 | <literallayout class='monospaced'> | ||
2506 | root@crownbay:~# lttng create | ||
2507 | Session auto-20190303-021943 created. | ||
2508 | Traces will be written in /home/root/lttng-traces/auto-20190303-021943 | ||
2509 | </literallayout> | ||
2510 | Enable the events you want to trace (in this case all | ||
2511 | userspace events): | ||
2512 | <literallayout class='monospaced'> | ||
2513 | root@crownbay:~# lttng enable-event --userspace --all | ||
2514 | All UST events are enabled in channel channel0 | ||
2515 | </literallayout> | ||
2516 | Start the trace: | ||
2517 | <literallayout class='monospaced'> | ||
2518 | root@crownbay:~# lttng start | ||
2519 | Tracing started for session auto-20190303-021943 | ||
2520 | </literallayout> | ||
2521 | Run the instrumented hello world program: | ||
2522 | <literallayout class='monospaced'> | ||
2523 | root@crownbay:~# ./hello | ||
2524 | Hello, World! | ||
2525 | Tracing... done. | ||
2526 | </literallayout> | ||
2527 | And then stop the trace after awhile or after running a | ||
2528 | particular workload that you want to trace: | ||
2529 | <literallayout class='monospaced'> | ||
2530 | root@crownbay:~# lttng stop | ||
2531 | Tracing stopped for session auto-20190303-021943 | ||
2532 | </literallayout> | ||
2533 | You can now view the trace in text form on the target: | ||
2534 | <literallayout class='monospaced'> | ||
2535 | root@crownbay:~# lttng view | ||
2536 | [02:31:14.906146544] (+?.?????????) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 0, intfield2 = 0x0, longfield = 0, netintfield = 0, netintfieldhex = 0x0, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 } | ||
2537 | [02:31:14.906170360] (+0.000023816) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 1, intfield2 = 0x1, longfield = 1, netintfield = 1, netintfieldhex = 0x1, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 } | ||
2538 | [02:31:14.906183140] (+0.000012780) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 2, intfield2 = 0x2, longfield = 2, netintfield = 2, netintfieldhex = 0x2, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 } | ||
2539 | [02:31:14.906194385] (+0.000011245) hello:1424 ust_tests_hello:tptest: { cpu_id = 1 }, { intfield = 3, intfield2 = 0x3, longfield = 3, netintfield = 3, netintfieldhex = 0x3, arrfield1 = [ [0] = 1, [1] = 2, [2] = 3 ], arrfield2 = "test", _seqfield1_length = 4, seqfield1 = [ [0] = 116, [1] = 101, [2] = 115, [3] = 116 ], _seqfield2_length = 4, seqfield2 = "test", stringfield = "test", floatfield = 2222, doublefield = 2, boolfield = 1 } | ||
2540 | . | ||
2541 | . | ||
2542 | . | ||
2543 | </literallayout> | ||
2544 | You can now safely destroy the trace session (note that | ||
2545 | this doesn't delete the trace - it's still | ||
2546 | there in ~/lttng-traces): | ||
2547 | <literallayout class='monospaced'> | ||
2548 | root@crownbay:~# lttng destroy | ||
2549 | Session auto-20190303-021943 destroyed at /home/root | ||
2550 | </literallayout> | ||
2551 | </para> | ||
2552 | </section> | ||
2553 | |||
2554 | </section> | ||
2555 | |||
2556 | <section id='lltng-documentation'> | ||
2557 | <title>Documentation</title> | ||
2558 | |||
2559 | <para> | ||
2560 | You can find the primary LTTng Documentation on the | ||
2561 | <ulink url='https://lttng.org/docs/'>LTTng Documentation</ulink> | ||
2562 | site. | ||
2563 | The documentation on this site is appropriate for intermediate to | ||
2564 | advanced software developers who are working in a Linux environment | ||
2565 | and are interested in efficient software tracing. | ||
2566 | </para> | ||
2567 | |||
2568 | <para> | ||
2569 | For information on LTTng in general, visit the | ||
2570 | <ulink url='http://lttng.org/lttng2.0'>LTTng Project</ulink> | ||
2571 | site. | ||
2572 | You can find a "Getting Started" link on this site that takes | ||
2573 | you to an LTTng Quick Start. | ||
2574 | </para> | ||
2575 | </section> | ||
2576 | </section> | ||
2577 | |||
2578 | <section id='profile-manual-blktrace'> | ||
2579 | <title>blktrace</title> | ||
2580 | |||
2581 | <para> | ||
2582 | blktrace is a tool for tracing and reporting low-level disk I/O. | ||
2583 | blktrace provides the tracing half of the equation; its output can | ||
2584 | be piped into the blkparse program, which renders the data in a | ||
2585 | human-readable form and does some basic analysis: | ||
2586 | </para> | ||
2587 | |||
2588 | <section id='blktrace-setup'> | ||
2589 | <title>Setup</title> | ||
2590 | |||
2591 | <para> | ||
2592 | For this section, we'll assume you've already performed the | ||
2593 | basic setup outlined in the | ||
2594 | "<link linkend='profile-manual-general-setup'>General Setup</link>" | ||
2595 | section. | ||
2596 | </para> | ||
2597 | |||
2598 | <para> | ||
2599 | blktrace is an application that runs on the target system. | ||
2600 | You can run the entire blktrace and blkparse pipeline on the | ||
2601 | target, or you can run blktrace in 'listen' mode on the target | ||
2602 | and have blktrace and blkparse collect and analyze the data on | ||
2603 | the host (see the | ||
2604 | "<link linkend='using-blktrace-remotely'>Using blktrace Remotely</link>" | ||
2605 | section below). | ||
2606 | For the rest of this section we assume you've ssh'ed to the | ||
2607 | host and will be running blkrace on the target. | ||
2608 | </para> | ||
2609 | </section> | ||
2610 | |||
2611 | <section id='blktrace-basic-usage'> | ||
2612 | <title>Basic Usage</title> | ||
2613 | |||
2614 | <para> | ||
2615 | To record a trace, simply run the 'blktrace' command, giving it | ||
2616 | the name of the block device you want to trace activity on: | ||
2617 | <literallayout class='monospaced'> | ||
2618 | root@crownbay:~# blktrace /dev/sdc | ||
2619 | </literallayout> | ||
2620 | In another shell, execute a workload you want to trace. | ||
2621 | <literallayout class='monospaced'> | ||
2622 | root@crownbay:/media/sdc# rm linux-2.6.19.2.tar.bz2; wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>; sync | ||
2623 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
2624 | linux-2.6.19.2.tar.b 100% |*******************************| 41727k 0:00:00 ETA | ||
2625 | </literallayout> | ||
2626 | Press Ctrl-C in the blktrace shell to stop the trace. It will | ||
2627 | display how many events were logged, along with the per-cpu file | ||
2628 | sizes (blktrace records traces in per-cpu kernel buffers and | ||
2629 | simply dumps them to userspace for blkparse to merge and sort | ||
2630 | later). | ||
2631 | <literallayout class='monospaced'> | ||
2632 | ^C=== sdc === | ||
2633 | CPU 0: 7082 events, 332 KiB data | ||
2634 | CPU 1: 1578 events, 74 KiB data | ||
2635 | Total: 8660 events (dropped 0), 406 KiB data | ||
2636 | </literallayout> | ||
2637 | If you examine the files saved to disk, you see multiple files, | ||
2638 | one per CPU and with the device name as the first part of the | ||
2639 | filename: | ||
2640 | <literallayout class='monospaced'> | ||
2641 | root@crownbay:~# ls -al | ||
2642 | drwxr-xr-x 6 root root 1024 Oct 27 22:39 . | ||
2643 | drwxr-sr-x 4 root root 1024 Oct 26 18:24 .. | ||
2644 | -rw-r--r-- 1 root root 339938 Oct 27 22:40 sdc.blktrace.0 | ||
2645 | -rw-r--r-- 1 root root 75753 Oct 27 22:40 sdc.blktrace.1 | ||
2646 | </literallayout> | ||
2647 | To view the trace events, simply invoke 'blkparse' in the | ||
2648 | directory containing the trace files, giving it the device name | ||
2649 | that forms the first part of the filenames: | ||
2650 | <literallayout class='monospaced'> | ||
2651 | root@crownbay:~# blkparse sdc | ||
2652 | |||
2653 | 8,32 1 1 0.000000000 1225 Q WS 3417048 + 8 [jbd2/sdc-8] | ||
2654 | 8,32 1 2 0.000025213 1225 G WS 3417048 + 8 [jbd2/sdc-8] | ||
2655 | 8,32 1 3 0.000033384 1225 P N [jbd2/sdc-8] | ||
2656 | 8,32 1 4 0.000043301 1225 I WS 3417048 + 8 [jbd2/sdc-8] | ||
2657 | 8,32 1 0 0.000057270 0 m N cfq1225 insert_request | ||
2658 | 8,32 1 0 0.000064813 0 m N cfq1225 add_to_rr | ||
2659 | 8,32 1 5 0.000076336 1225 U N [jbd2/sdc-8] 1 | ||
2660 | 8,32 1 0 0.000088559 0 m N cfq workload slice:150 | ||
2661 | 8,32 1 0 0.000097359 0 m N cfq1225 set_active wl_prio:0 wl_type:1 | ||
2662 | 8,32 1 0 0.000104063 0 m N cfq1225 Not idling. st->count:1 | ||
2663 | 8,32 1 0 0.000112584 0 m N cfq1225 fifo= (null) | ||
2664 | 8,32 1 0 0.000118730 0 m N cfq1225 dispatch_insert | ||
2665 | 8,32 1 0 0.000127390 0 m N cfq1225 dispatched a request | ||
2666 | 8,32 1 0 0.000133536 0 m N cfq1225 activate rq, drv=1 | ||
2667 | 8,32 1 6 0.000136889 1225 D WS 3417048 + 8 [jbd2/sdc-8] | ||
2668 | 8,32 1 7 0.000360381 1225 Q WS 3417056 + 8 [jbd2/sdc-8] | ||
2669 | 8,32 1 8 0.000377422 1225 G WS 3417056 + 8 [jbd2/sdc-8] | ||
2670 | 8,32 1 9 0.000388876 1225 P N [jbd2/sdc-8] | ||
2671 | 8,32 1 10 0.000397886 1225 Q WS 3417064 + 8 [jbd2/sdc-8] | ||
2672 | 8,32 1 11 0.000404800 1225 M WS 3417064 + 8 [jbd2/sdc-8] | ||
2673 | 8,32 1 12 0.000412343 1225 Q WS 3417072 + 8 [jbd2/sdc-8] | ||
2674 | 8,32 1 13 0.000416533 1225 M WS 3417072 + 8 [jbd2/sdc-8] | ||
2675 | 8,32 1 14 0.000422121 1225 Q WS 3417080 + 8 [jbd2/sdc-8] | ||
2676 | 8,32 1 15 0.000425194 1225 M WS 3417080 + 8 [jbd2/sdc-8] | ||
2677 | 8,32 1 16 0.000431968 1225 Q WS 3417088 + 8 [jbd2/sdc-8] | ||
2678 | 8,32 1 17 0.000435251 1225 M WS 3417088 + 8 [jbd2/sdc-8] | ||
2679 | 8,32 1 18 0.000440279 1225 Q WS 3417096 + 8 [jbd2/sdc-8] | ||
2680 | 8,32 1 19 0.000443911 1225 M WS 3417096 + 8 [jbd2/sdc-8] | ||
2681 | 8,32 1 20 0.000450336 1225 Q WS 3417104 + 8 [jbd2/sdc-8] | ||
2682 | 8,32 1 21 0.000454038 1225 M WS 3417104 + 8 [jbd2/sdc-8] | ||
2683 | 8,32 1 22 0.000462070 1225 Q WS 3417112 + 8 [jbd2/sdc-8] | ||
2684 | 8,32 1 23 0.000465422 1225 M WS 3417112 + 8 [jbd2/sdc-8] | ||
2685 | 8,32 1 24 0.000474222 1225 I WS 3417056 + 64 [jbd2/sdc-8] | ||
2686 | 8,32 1 0 0.000483022 0 m N cfq1225 insert_request | ||
2687 | 8,32 1 25 0.000489727 1225 U N [jbd2/sdc-8] 1 | ||
2688 | 8,32 1 0 0.000498457 0 m N cfq1225 Not idling. st->count:1 | ||
2689 | 8,32 1 0 0.000503765 0 m N cfq1225 dispatch_insert | ||
2690 | 8,32 1 0 0.000512914 0 m N cfq1225 dispatched a request | ||
2691 | 8,32 1 0 0.000518851 0 m N cfq1225 activate rq, drv=2 | ||
2692 | . | ||
2693 | . | ||
2694 | . | ||
2695 | 8,32 0 0 58.515006138 0 m N cfq3551 complete rqnoidle 1 | ||
2696 | 8,32 0 2024 58.516603269 3 C WS 3156992 + 16 [0] | ||
2697 | 8,32 0 0 58.516626736 0 m N cfq3551 complete rqnoidle 1 | ||
2698 | 8,32 0 0 58.516634558 0 m N cfq3551 arm_idle: 8 group_idle: 0 | ||
2699 | 8,32 0 0 58.516636933 0 m N cfq schedule dispatch | ||
2700 | 8,32 1 0 58.516971613 0 m N cfq3551 slice expired t=0 | ||
2701 | 8,32 1 0 58.516982089 0 m N cfq3551 sl_used=13 disp=6 charge=13 iops=0 sect=80 | ||
2702 | 8,32 1 0 58.516985511 0 m N cfq3551 del_from_rr | ||
2703 | 8,32 1 0 58.516990819 0 m N cfq3551 put_queue | ||
2704 | |||
2705 | CPU0 (sdc): | ||
2706 | Reads Queued: 0, 0KiB Writes Queued: 331, 26,284KiB | ||
2707 | Read Dispatches: 0, 0KiB Write Dispatches: 485, 40,484KiB | ||
2708 | Reads Requeued: 0 Writes Requeued: 0 | ||
2709 | Reads Completed: 0, 0KiB Writes Completed: 511, 41,000KiB | ||
2710 | Read Merges: 0, 0KiB Write Merges: 13, 160KiB | ||
2711 | Read depth: 0 Write depth: 2 | ||
2712 | IO unplugs: 23 Timer unplugs: 0 | ||
2713 | CPU1 (sdc): | ||
2714 | Reads Queued: 0, 0KiB Writes Queued: 249, 15,800KiB | ||
2715 | Read Dispatches: 0, 0KiB Write Dispatches: 42, 1,600KiB | ||
2716 | Reads Requeued: 0 Writes Requeued: 0 | ||
2717 | Reads Completed: 0, 0KiB Writes Completed: 16, 1,084KiB | ||
2718 | Read Merges: 0, 0KiB Write Merges: 40, 276KiB | ||
2719 | Read depth: 0 Write depth: 2 | ||
2720 | IO unplugs: 30 Timer unplugs: 1 | ||
2721 | |||
2722 | Total (sdc): | ||
2723 | Reads Queued: 0, 0KiB Writes Queued: 580, 42,084KiB | ||
2724 | Read Dispatches: 0, 0KiB Write Dispatches: 527, 42,084KiB | ||
2725 | Reads Requeued: 0 Writes Requeued: 0 | ||
2726 | Reads Completed: 0, 0KiB Writes Completed: 527, 42,084KiB | ||
2727 | Read Merges: 0, 0KiB Write Merges: 53, 436KiB | ||
2728 | IO unplugs: 53 Timer unplugs: 1 | ||
2729 | |||
2730 | Throughput (R/W): 0KiB/s / 719KiB/s | ||
2731 | Events (sdc): 6,592 entries | ||
2732 | Skips: 0 forward (0 - 0.0%) | ||
2733 | Input file sdc.blktrace.0 added | ||
2734 | Input file sdc.blktrace.1 added | ||
2735 | </literallayout> | ||
2736 | The report shows each event that was found in the blktrace data, | ||
2737 | along with a summary of the overall block I/O traffic during | ||
2738 | the run. You can look at the | ||
2739 | <ulink url='http://linux.die.net/man/1/blkparse'>blkparse</ulink> | ||
2740 | manpage to learn the | ||
2741 | meaning of each field displayed in the trace listing. | ||
2742 | </para> | ||
2743 | |||
2744 | <section id='blktrace-live-mode'> | ||
2745 | <title>Live Mode</title> | ||
2746 | |||
2747 | <para> | ||
2748 | blktrace and blkparse are designed from the ground up to | ||
2749 | be able to operate together in a 'pipe mode' where the | ||
2750 | stdout of blktrace can be fed directly into the stdin of | ||
2751 | blkparse: | ||
2752 | <literallayout class='monospaced'> | ||
2753 | root@crownbay:~# blktrace /dev/sdc -o - | blkparse -i - | ||
2754 | </literallayout> | ||
2755 | This enables long-lived tracing sessions to run without | ||
2756 | writing anything to disk, and allows the user to look for | ||
2757 | certain conditions in the trace data in 'real-time' by | ||
2758 | viewing the trace output as it scrolls by on the screen or | ||
2759 | by passing it along to yet another program in the pipeline | ||
2760 | such as grep which can be used to identify and capture | ||
2761 | conditions of interest. | ||
2762 | </para> | ||
2763 | |||
2764 | <para> | ||
2765 | There's actually another blktrace command that implements | ||
2766 | the above pipeline as a single command, so the user doesn't | ||
2767 | have to bother typing in the above command sequence: | ||
2768 | <literallayout class='monospaced'> | ||
2769 | root@crownbay:~# btrace /dev/sdc | ||
2770 | </literallayout> | ||
2771 | </para> | ||
2772 | </section> | ||
2773 | |||
2774 | <section id='using-blktrace-remotely'> | ||
2775 | <title>Using blktrace Remotely</title> | ||
2776 | |||
2777 | <para> | ||
2778 | Because blktrace traces block I/O and at the same time | ||
2779 | normally writes its trace data to a block device, and | ||
2780 | in general because it's not really a great idea to make | ||
2781 | the device being traced the same as the device the tracer | ||
2782 | writes to, blktrace provides a way to trace without | ||
2783 | perturbing the traced device at all by providing native | ||
2784 | support for sending all trace data over the network. | ||
2785 | </para> | ||
2786 | |||
2787 | <para> | ||
2788 | To have blktrace operate in this mode, start blktrace on | ||
2789 | the target system being traced with the -l option, along with | ||
2790 | the device to trace: | ||
2791 | <literallayout class='monospaced'> | ||
2792 | root@crownbay:~# blktrace -l /dev/sdc | ||
2793 | server: waiting for connections... | ||
2794 | </literallayout> | ||
2795 | On the host system, use the -h option to connect to the | ||
2796 | target system, also passing it the device to trace: | ||
2797 | <literallayout class='monospaced'> | ||
2798 | $ blktrace -d /dev/sdc -h 192.168.1.43 | ||
2799 | blktrace: connecting to 192.168.1.43 | ||
2800 | blktrace: connected! | ||
2801 | </literallayout> | ||
2802 | On the target system, you should see this: | ||
2803 | <literallayout class='monospaced'> | ||
2804 | server: connection from 192.168.1.43 | ||
2805 | </literallayout> | ||
2806 | In another shell, execute a workload you want to trace. | ||
2807 | <literallayout class='monospaced'> | ||
2808 | root@crownbay:/media/sdc# rm linux-2.6.19.2.tar.bz2; wget <ulink url='http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2'>http://downloads.yoctoproject.org/mirror/sources/linux-2.6.19.2.tar.bz2</ulink>; sync | ||
2809 | Connecting to downloads.yoctoproject.org (140.211.169.59:80) | ||
2810 | linux-2.6.19.2.tar.b 100% |*******************************| 41727k 0:00:00 ETA | ||
2811 | </literallayout> | ||
2812 | When it's done, do a Ctrl-C on the host system to | ||
2813 | stop the trace: | ||
2814 | <literallayout class='monospaced'> | ||
2815 | ^C=== sdc === | ||
2816 | CPU 0: 7691 events, 361 KiB data | ||
2817 | CPU 1: 4109 events, 193 KiB data | ||
2818 | Total: 11800 events (dropped 0), 554 KiB data | ||
2819 | </literallayout> | ||
2820 | On the target system, you should also see a trace | ||
2821 | summary for the trace just ended: | ||
2822 | <literallayout class='monospaced'> | ||
2823 | server: end of run for 192.168.1.43:sdc | ||
2824 | === sdc === | ||
2825 | CPU 0: 7691 events, 361 KiB data | ||
2826 | CPU 1: 4109 events, 193 KiB data | ||
2827 | Total: 11800 events (dropped 0), 554 KiB data | ||
2828 | </literallayout> | ||
2829 | The blktrace instance on the host will save the target | ||
2830 | output inside a hostname-timestamp directory: | ||
2831 | <literallayout class='monospaced'> | ||
2832 | $ ls -al | ||
2833 | drwxr-xr-x 10 root root 1024 Oct 28 02:40 . | ||
2834 | drwxr-sr-x 4 root root 1024 Oct 26 18:24 .. | ||
2835 | drwxr-xr-x 2 root root 1024 Oct 28 02:40 192.168.1.43-2012-10-28-02:40:56 | ||
2836 | </literallayout> | ||
2837 | cd into that directory to see the output files: | ||
2838 | <literallayout class='monospaced'> | ||
2839 | $ ls -l | ||
2840 | -rw-r--r-- 1 root root 369193 Oct 28 02:44 sdc.blktrace.0 | ||
2841 | -rw-r--r-- 1 root root 197278 Oct 28 02:44 sdc.blktrace.1 | ||
2842 | </literallayout> | ||
2843 | And run blkparse on the host system using the device name: | ||
2844 | <literallayout class='monospaced'> | ||
2845 | $ blkparse sdc | ||
2846 | |||
2847 | 8,32 1 1 0.000000000 1263 Q RM 6016 + 8 [ls] | ||
2848 | 8,32 1 0 0.000036038 0 m N cfq1263 alloced | ||
2849 | 8,32 1 2 0.000039390 1263 G RM 6016 + 8 [ls] | ||
2850 | 8,32 1 3 0.000049168 1263 I RM 6016 + 8 [ls] | ||
2851 | 8,32 1 0 0.000056152 0 m N cfq1263 insert_request | ||
2852 | 8,32 1 0 0.000061600 0 m N cfq1263 add_to_rr | ||
2853 | 8,32 1 0 0.000075498 0 m N cfq workload slice:300 | ||
2854 | . | ||
2855 | . | ||
2856 | . | ||
2857 | 8,32 0 0 177.266385696 0 m N cfq1267 arm_idle: 8 group_idle: 0 | ||
2858 | 8,32 0 0 177.266388140 0 m N cfq schedule dispatch | ||
2859 | 8,32 1 0 177.266679239 0 m N cfq1267 slice expired t=0 | ||
2860 | 8,32 1 0 177.266689297 0 m N cfq1267 sl_used=9 disp=6 charge=9 iops=0 sect=56 | ||
2861 | 8,32 1 0 177.266692649 0 m N cfq1267 del_from_rr | ||
2862 | 8,32 1 0 177.266696560 0 m N cfq1267 put_queue | ||
2863 | |||
2864 | CPU0 (sdc): | ||
2865 | Reads Queued: 0, 0KiB Writes Queued: 270, 21,708KiB | ||
2866 | Read Dispatches: 59, 2,628KiB Write Dispatches: 495, 39,964KiB | ||
2867 | Reads Requeued: 0 Writes Requeued: 0 | ||
2868 | Reads Completed: 90, 2,752KiB Writes Completed: 543, 41,596KiB | ||
2869 | Read Merges: 0, 0KiB Write Merges: 9, 344KiB | ||
2870 | Read depth: 2 Write depth: 2 | ||
2871 | IO unplugs: 20 Timer unplugs: 1 | ||
2872 | CPU1 (sdc): | ||
2873 | Reads Queued: 688, 2,752KiB Writes Queued: 381, 20,652KiB | ||
2874 | Read Dispatches: 31, 124KiB Write Dispatches: 59, 2,396KiB | ||
2875 | Reads Requeued: 0 Writes Requeued: 0 | ||
2876 | Reads Completed: 0, 0KiB Writes Completed: 11, 764KiB | ||
2877 | Read Merges: 598, 2,392KiB Write Merges: 88, 448KiB | ||
2878 | Read depth: 2 Write depth: 2 | ||
2879 | IO unplugs: 52 Timer unplugs: 0 | ||
2880 | |||
2881 | Total (sdc): | ||
2882 | Reads Queued: 688, 2,752KiB Writes Queued: 651, 42,360KiB | ||
2883 | Read Dispatches: 90, 2,752KiB Write Dispatches: 554, 42,360KiB | ||
2884 | Reads Requeued: 0 Writes Requeued: 0 | ||
2885 | Reads Completed: 90, 2,752KiB Writes Completed: 554, 42,360KiB | ||
2886 | Read Merges: 598, 2,392KiB Write Merges: 97, 792KiB | ||
2887 | IO unplugs: 72 Timer unplugs: 1 | ||
2888 | |||
2889 | Throughput (R/W): 15KiB/s / 238KiB/s | ||
2890 | Events (sdc): 9,301 entries | ||
2891 | Skips: 0 forward (0 - 0.0%) | ||
2892 | </literallayout> | ||
2893 | You should see the trace events and summary just as | ||
2894 | you would have if you'd run the same command on the target. | ||
2895 | </para> | ||
2896 | </section> | ||
2897 | |||
2898 | <section id='tracing-block-io-via-ftrace'> | ||
2899 | <title>Tracing Block I/O via 'ftrace'</title> | ||
2900 | |||
2901 | <para> | ||
2902 | It's also possible to trace block I/O using only | ||
2903 | <link linkend='the-trace-events-subsystem'>trace events subsystem</link>, | ||
2904 | which can be useful for casual tracing | ||
2905 | if you don't want to bother dealing with the userspace tools. | ||
2906 | </para> | ||
2907 | |||
2908 | <para> | ||
2909 | To enable tracing for a given device, use | ||
2910 | /sys/block/xxx/trace/enable, where xxx is the device name. | ||
2911 | This for example enables tracing for /dev/sdc: | ||
2912 | <literallayout class='monospaced'> | ||
2913 | root@crownbay:/sys/kernel/debug/tracing# echo 1 > /sys/block/sdc/trace/enable | ||
2914 | </literallayout> | ||
2915 | Once you've selected the device(s) you want to trace, | ||
2916 | selecting the 'blk' tracer will turn the blk tracer on: | ||
2917 | <literallayout class='monospaced'> | ||
2918 | root@crownbay:/sys/kernel/debug/tracing# cat available_tracers | ||
2919 | blk function_graph function nop | ||
2920 | |||
2921 | root@crownbay:/sys/kernel/debug/tracing# echo blk > current_tracer | ||
2922 | </literallayout> | ||
2923 | Execute the workload you're interested in: | ||
2924 | <literallayout class='monospaced'> | ||
2925 | root@crownbay:/sys/kernel/debug/tracing# cat /media/sdc/testfile.txt | ||
2926 | </literallayout> | ||
2927 | And look at the output (note here that we're using | ||
2928 | 'trace_pipe' instead of trace to capture this trace - | ||
2929 | this allows us to wait around on the pipe for data to | ||
2930 | appear): | ||
2931 | <literallayout class='monospaced'> | ||
2932 | root@crownbay:/sys/kernel/debug/tracing# cat trace_pipe | ||
2933 | cat-3587 [001] d..1 3023.276361: 8,32 Q R 1699848 + 8 [cat] | ||
2934 | cat-3587 [001] d..1 3023.276410: 8,32 m N cfq3587 alloced | ||
2935 | cat-3587 [001] d..1 3023.276415: 8,32 G R 1699848 + 8 [cat] | ||
2936 | cat-3587 [001] d..1 3023.276424: 8,32 P N [cat] | ||
2937 | cat-3587 [001] d..2 3023.276432: 8,32 I R 1699848 + 8 [cat] | ||
2938 | cat-3587 [001] d..1 3023.276439: 8,32 m N cfq3587 insert_request | ||
2939 | cat-3587 [001] d..1 3023.276445: 8,32 m N cfq3587 add_to_rr | ||
2940 | cat-3587 [001] d..2 3023.276454: 8,32 U N [cat] 1 | ||
2941 | cat-3587 [001] d..1 3023.276464: 8,32 m N cfq workload slice:150 | ||
2942 | cat-3587 [001] d..1 3023.276471: 8,32 m N cfq3587 set_active wl_prio:0 wl_type:2 | ||
2943 | cat-3587 [001] d..1 3023.276478: 8,32 m N cfq3587 fifo= (null) | ||
2944 | cat-3587 [001] d..1 3023.276483: 8,32 m N cfq3587 dispatch_insert | ||
2945 | cat-3587 [001] d..1 3023.276490: 8,32 m N cfq3587 dispatched a request | ||
2946 | cat-3587 [001] d..1 3023.276497: 8,32 m N cfq3587 activate rq, drv=1 | ||
2947 | cat-3587 [001] d..2 3023.276500: 8,32 D R 1699848 + 8 [cat] | ||
2948 | </literallayout> | ||
2949 | And this turns off tracing for the specified device: | ||
2950 | <literallayout class='monospaced'> | ||
2951 | root@crownbay:/sys/kernel/debug/tracing# echo 0 > /sys/block/sdc/trace/enable | ||
2952 | </literallayout> | ||
2953 | </para> | ||
2954 | </section> | ||
2955 | </section> | ||
2956 | |||
2957 | <section id='blktrace-documentation'> | ||
2958 | <title>Documentation</title> | ||
2959 | |||
2960 | <para> | ||
2961 | Online versions of the man pages for the commands discussed | ||
2962 | in this section can be found here: | ||
2963 | <itemizedlist> | ||
2964 | <listitem><para><ulink url='http://linux.die.net/man/8/blktrace'>http://linux.die.net/man/8/blktrace</ulink> | ||
2965 | </para></listitem> | ||
2966 | <listitem><para><ulink url='http://linux.die.net/man/1/blkparse'>http://linux.die.net/man/1/blkparse</ulink> | ||
2967 | </para></listitem> | ||
2968 | <listitem><para><ulink url='http://linux.die.net/man/8/btrace'>http://linux.die.net/man/8/btrace</ulink> | ||
2969 | </para></listitem> | ||
2970 | </itemizedlist> | ||
2971 | </para> | ||
2972 | |||
2973 | <para> | ||
2974 | The above manpages, along with manpages for the other | ||
2975 | blktrace utilities (btt, blkiomon, etc) can be found in the | ||
2976 | /doc directory of the blktrace tools git repo: | ||
2977 | <literallayout class='monospaced'> | ||
2978 | $ git clone git://git.kernel.dk/blktrace.git | ||
2979 | </literallayout> | ||
2980 | </para> | ||
2981 | </section> | ||
2982 | </section> | ||
2983 | </chapter> | ||
2984 | <!-- | ||
2985 | vim: expandtab tw=80 ts=4 | ||
2986 | --> | ||
diff --git a/documentation/profile-manual/profile-manual.xml b/documentation/profile-manual/profile-manual.xml deleted file mode 100755 index 48bfba5b8f..0000000000 --- a/documentation/profile-manual/profile-manual.xml +++ /dev/null | |||
@@ -1,180 +0,0 @@ | |||
1 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" | ||
2 | "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" | ||
3 | [<!ENTITY % poky SYSTEM "../poky.ent"> %poky; ] > | ||
4 | <!--SPDX-License-Identifier: CC-BY-2.0-UK--> | ||
5 | |||
6 | <book id='profile-manual' lang='en' | ||
7 | xmlns:xi="http://www.w3.org/2003/XInclude" | ||
8 | xmlns="http://docbook.org/ns/docbook" | ||
9 | > | ||
10 | <bookinfo> | ||
11 | |||
12 | <mediaobject> | ||
13 | <imageobject> | ||
14 | <imagedata fileref='figures/profile-title.png' | ||
15 | format='SVG' | ||
16 | align='left' scalefit='1' width='100%'/> | ||
17 | </imageobject> | ||
18 | </mediaobject> | ||
19 | |||
20 | <title> | ||
21 | Yocto Project Profiling and Tracing Manual | ||
22 | </title> | ||
23 | |||
24 | <authorgroup> | ||
25 | <author> | ||
26 | <affiliation> | ||
27 | <orgname>&ORGNAME;</orgname> | ||
28 | </affiliation> | ||
29 | <email>&ORGEMAIL;</email> | ||
30 | </author> | ||
31 | </authorgroup> | ||
32 | |||
33 | <revhistory> | ||
34 | <revision> | ||
35 | <revnumber>1.4</revnumber> | ||
36 | <date>April 2013</date> | ||
37 | <revremark>The initial document released with the Yocto Project 1.4 Release.</revremark> | ||
38 | </revision> | ||
39 | <revision> | ||
40 | <revnumber>1.5</revnumber> | ||
41 | <date>October 2013</date> | ||
42 | <revremark>Released with the Yocto Project 1.5 Release.</revremark> | ||
43 | </revision> | ||
44 | <revision> | ||
45 | <revnumber>1.6</revnumber> | ||
46 | <date>April 2014</date> | ||
47 | <revremark>Released with the Yocto Project 1.6 Release.</revremark> | ||
48 | </revision> | ||
49 | <revision> | ||
50 | <revnumber>1.7</revnumber> | ||
51 | <date>October 2014</date> | ||
52 | <revremark>Released with the Yocto Project 1.7 Release.</revremark> | ||
53 | </revision> | ||
54 | <revision> | ||
55 | <revnumber>1.8</revnumber> | ||
56 | <date>April 2015</date> | ||
57 | <revremark>Released with the Yocto Project 1.8 Release.</revremark> | ||
58 | </revision> | ||
59 | <revision> | ||
60 | <revnumber>2.0</revnumber> | ||
61 | <date>October 2015</date> | ||
62 | <revremark>Released with the Yocto Project 2.0 Release.</revremark> | ||
63 | </revision> | ||
64 | <revision> | ||
65 | <revnumber>2.1</revnumber> | ||
66 | <date>April 2016</date> | ||
67 | <revremark>Released with the Yocto Project 2.1 Release.</revremark> | ||
68 | </revision> | ||
69 | <revision> | ||
70 | <revnumber>2.2</revnumber> | ||
71 | <date>October 2016</date> | ||
72 | <revremark>Released with the Yocto Project 2.2 Release.</revremark> | ||
73 | </revision> | ||
74 | <revision> | ||
75 | <revnumber>2.3</revnumber> | ||
76 | <date>May 2017</date> | ||
77 | <revremark>Released with the Yocto Project 2.3 Release.</revremark> | ||
78 | </revision> | ||
79 | <revision> | ||
80 | <revnumber>2.4</revnumber> | ||
81 | <date>October 2017</date> | ||
82 | <revremark>Released with the Yocto Project 2.4 Release.</revremark> | ||
83 | </revision> | ||
84 | <revision> | ||
85 | <revnumber>2.5</revnumber> | ||
86 | <date>May 2018</date> | ||
87 | <revremark>Released with the Yocto Project 2.5 Release.</revremark> | ||
88 | </revision> | ||
89 | <revision> | ||
90 | <revnumber>2.6</revnumber> | ||
91 | <date>November 2018</date> | ||
92 | <revremark>Released with the Yocto Project 2.6 Release.</revremark> | ||
93 | </revision> | ||
94 | <revision> | ||
95 | <revnumber>2.7</revnumber> | ||
96 | <date>May 2019</date> | ||
97 | <revremark>Released with the Yocto Project 2.7 Release.</revremark> | ||
98 | </revision> | ||
99 | <revision> | ||
100 | <revnumber>3.0</revnumber> | ||
101 | <date>October 2019</date> | ||
102 | <revremark>Released with the Yocto Project 3.0 Release.</revremark> | ||
103 | </revision> | ||
104 | <revision> | ||
105 | <revnumber>3.1</revnumber> | ||
106 | <date>&REL_MONTH_YEAR;</date> | ||
107 | <revremark>Released with the Yocto Project 3.1 Release.</revremark> | ||
108 | </revision> | ||
109 | </revhistory> | ||
110 | |||
111 | <copyright> | ||
112 | <year>©RIGHT_YEAR;</year> | ||
113 | <holder>Linux Foundation</holder> | ||
114 | </copyright> | ||
115 | |||
116 | <legalnotice> | ||
117 | <para> | ||
118 | Permission is granted to copy, distribute and/or modify this document under | ||
119 | the terms of the <ulink type="http" url="http://creativecommons.org/licenses/by-sa/2.0/uk/"> | ||
120 | Creative Commons Attribution-Share Alike 2.0 UK: England & Wales</ulink> as published by | ||
121 | Creative Commons. | ||
122 | </para> | ||
123 | <note><title>Manual Notes</title> | ||
124 | <itemizedlist> | ||
125 | <listitem><para> | ||
126 | This version of the | ||
127 | <emphasis>Yocto Project Profiling and Tracing Manual</emphasis> | ||
128 | is for the &YOCTO_DOC_VERSION; release of the | ||
129 | Yocto Project. | ||
130 | To be sure you have the latest version of the manual | ||
131 | for this release, go to the | ||
132 | <ulink url='&YOCTO_DOCS_URL;'>Yocto Project documentation page</ulink> | ||
133 | and select the manual from that site. | ||
134 | Manuals from the site are more up-to-date than manuals | ||
135 | derived from the Yocto Project released TAR files. | ||
136 | </para></listitem> | ||
137 | <listitem><para> | ||
138 | If you located this manual through a web search, the | ||
139 | version of the manual might not be the one you want | ||
140 | (e.g. the search might have returned a manual much | ||
141 | older than the Yocto Project version with which you | ||
142 | are working). | ||
143 | You can see all Yocto Project major releases by | ||
144 | visiting the | ||
145 | <ulink url='&YOCTO_WIKI_URL;/wiki/Releases'>Releases</ulink> | ||
146 | page. | ||
147 | If you need a version of this manual for a different | ||
148 | Yocto Project release, visit the | ||
149 | <ulink url='&YOCTO_DOCS_URL;'>Yocto Project documentation page</ulink> | ||
150 | and select the manual set by using the | ||
151 | "ACTIVE RELEASES DOCUMENTATION" or "DOCUMENTS ARCHIVE" | ||
152 | pull-down menus. | ||
153 | </para></listitem> | ||
154 | <listitem> | ||
155 | <para> | ||
156 | To report any inaccuracies or problems with this | ||
157 | (or any other Yocto Project) manual, send an email to | ||
158 | the Yocto Project documentation mailing list at | ||
159 | <filename>docs@lists.yoctoproject.org</filename> or | ||
160 | log into the freenode <filename>#yocto</filename> channel. | ||
161 | </para> | ||
162 | </listitem> | ||
163 | </itemizedlist> | ||
164 | </note> | ||
165 | </legalnotice> | ||
166 | |||
167 | </bookinfo> | ||
168 | |||
169 | <xi:include href="profile-manual-intro.xml"/> | ||
170 | |||
171 | <xi:include href="profile-manual-arch.xml"/> | ||
172 | |||
173 | <xi:include href="profile-manual-usage.xml"/> | ||
174 | |||
175 | <xi:include href="profile-manual-examples.xml"/> | ||
176 | |||
177 | </book> | ||
178 | <!-- | ||
179 | vim: expandtab tw=80 ts=4 | ||
180 | --> | ||