diff options
author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2013-03-14 17:26:20 +0000 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2013-03-18 21:26:40 +0000 |
commit | add11fa1abfc51f1a793c84f02cd7d85d793fe14 (patch) | |
tree | e58176a867d272a57d91f61e2f954fc0249d007e /meta/lib | |
parent | cec0102647e3f2c93de5125c1de2436b4b787bdd (diff) | |
download | poky-add11fa1abfc51f1a793c84f02cd7d85d793fe14.tar.gz |
package: Add cachedpath optimisation
Currently, various standard library operations like os.walk(),
os.path.isdir() and os.path.islink() each call stat or lstat which
involves a syscall into the kernel. There is no caching since they could
conceivably have changed on disk. The result is that for something like
the do_package task of the kernel we're spending over two minutes making
868,000 individual stat calls for 23,000 files. This is suboptimal.
This patch adds lib/oe/cachedpath.py which are a set of replacement
functions for these operations which use cached stat data rather than
hitting the kernel each time. It gives a nice performance improvement
halving the build time of the kernel do_package.
(From OE-Core rev: 556dee0c4d6d8a87c0cddbd2f60fe5917d009f18)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'meta/lib')
-rw-r--r-- | meta/lib/oe/cachedpath.py | 235 |
1 files changed, 235 insertions, 0 deletions
diff --git a/meta/lib/oe/cachedpath.py b/meta/lib/oe/cachedpath.py new file mode 100644 index 0000000000..e350c8a70e --- /dev/null +++ b/meta/lib/oe/cachedpath.py | |||
@@ -0,0 +1,235 @@ | |||
1 | # | ||
2 | # Based on standard python library functions but avoid | ||
3 | # repeated stat calls. Its assumed the files will not change from under us | ||
4 | # so we can cache stat calls. | ||
5 | # | ||
6 | |||
7 | import os | ||
8 | import errno | ||
9 | import stat as statmod | ||
10 | |||
11 | class CachedPath(object): | ||
12 | def __init__(self): | ||
13 | self.statcache = {} | ||
14 | self.lstatcache = {} | ||
15 | self.normpathcache = {} | ||
16 | return | ||
17 | |||
18 | def updatecache(self, x): | ||
19 | x = self.normpath(x) | ||
20 | if x in self.statcache: | ||
21 | del self.statcache[x] | ||
22 | if x in self.lstatcache: | ||
23 | del self.lstatcache[x] | ||
24 | |||
25 | def normpath(self, path): | ||
26 | if path in self.normpathcache: | ||
27 | return self.normpathcache[path] | ||
28 | newpath = os.path.normpath(path) | ||
29 | self.normpathcache[path] = newpath | ||
30 | return newpath | ||
31 | |||
32 | def _callstat(self, path): | ||
33 | if path in self.statcache: | ||
34 | return self.statcache[path] | ||
35 | try: | ||
36 | st = os.stat(path) | ||
37 | self.statcache[path] = st | ||
38 | return st | ||
39 | except os.error: | ||
40 | self.statcache[path] = False | ||
41 | return False | ||
42 | |||
43 | # We might as well call lstat and then only | ||
44 | # call stat as well in the symbolic link case | ||
45 | # since this turns out to be much more optimal | ||
46 | # in real world usage of this cache | ||
47 | def callstat(self, path): | ||
48 | path = self.normpath(path) | ||
49 | self.calllstat(path) | ||
50 | return self.statcache[path] | ||
51 | |||
52 | def calllstat(self, path): | ||
53 | path = self.normpath(path) | ||
54 | if path in self.lstatcache: | ||
55 | return self.lstatcache[path] | ||
56 | #bb.error("LStatpath:" + path) | ||
57 | try: | ||
58 | lst = os.lstat(path) | ||
59 | self.lstatcache[path] = lst | ||
60 | if not statmod.S_ISLNK(lst.st_mode): | ||
61 | self.statcache[path] = lst | ||
62 | else: | ||
63 | self._callstat(path) | ||
64 | return lst | ||
65 | except (os.error, AttributeError): | ||
66 | self.lstatcache[path] = False | ||
67 | self.statcache[path] = False | ||
68 | return False | ||
69 | |||
70 | # This follows symbolic links, so both islink() and isdir() can be true | ||
71 | # for the same path ono systems that support symlinks | ||
72 | def isfile(self, path): | ||
73 | """Test whether a path is a regular file""" | ||
74 | st = self.callstat(path) | ||
75 | if not st: | ||
76 | return False | ||
77 | return statmod.S_ISREG(st.st_mode) | ||
78 | |||
79 | # Is a path a directory? | ||
80 | # This follows symbolic links, so both islink() and isdir() | ||
81 | # can be true for the same path on systems that support symlinks | ||
82 | def isdir(self, s): | ||
83 | """Return true if the pathname refers to an existing directory.""" | ||
84 | st = self.callstat(s) | ||
85 | if not st: | ||
86 | return False | ||
87 | return statmod.S_ISDIR(st.st_mode) | ||
88 | |||
89 | def islink(self, path): | ||
90 | """Test whether a path is a symbolic link""" | ||
91 | st = self.calllstat(path) | ||
92 | if not st: | ||
93 | return False | ||
94 | return statmod.S_ISLNK(st.st_mode) | ||
95 | |||
96 | # Does a path exist? | ||
97 | # This is false for dangling symbolic links on systems that support them. | ||
98 | def exists(self, path): | ||
99 | """Test whether a path exists. Returns False for broken symbolic links""" | ||
100 | if self.callstat(path): | ||
101 | return True | ||
102 | return False | ||
103 | |||
104 | def lexists(self, path): | ||
105 | """Test whether a path exists. Returns True for broken symbolic links""" | ||
106 | if self.calllstat(path): | ||
107 | return True | ||
108 | return False | ||
109 | |||
110 | def stat(self, path): | ||
111 | return self.callstat(path) | ||
112 | |||
113 | def lstat(self, path): | ||
114 | return self.calllstat(path) | ||
115 | |||
116 | def walk(self, top, topdown=True, onerror=None, followlinks=False): | ||
117 | # Matches os.walk, not os.path.walk() | ||
118 | |||
119 | # We may not have read permission for top, in which case we can't | ||
120 | # get a list of the files the directory contains. os.path.walk | ||
121 | # always suppressed the exception then, rather than blow up for a | ||
122 | # minor reason when (say) a thousand readable directories are still | ||
123 | # left to visit. That logic is copied here. | ||
124 | try: | ||
125 | # Note that listdir and error are globals in this module due | ||
126 | # to earlier import-*. | ||
127 | names = os.listdir(top) | ||
128 | except error, err: | ||
129 | if onerror is not None: | ||
130 | onerror(err) | ||
131 | return | ||
132 | |||
133 | dirs, nondirs = [], [] | ||
134 | for name in names: | ||
135 | if self.isdir(os.path.join(top, name)): | ||
136 | dirs.append(name) | ||
137 | else: | ||
138 | nondirs.append(name) | ||
139 | |||
140 | if topdown: | ||
141 | yield top, dirs, nondirs | ||
142 | for name in dirs: | ||
143 | new_path = os.path.join(top, name) | ||
144 | if followlinks or not self.islink(new_path): | ||
145 | for x in self.walk(new_path, topdown, onerror, followlinks): | ||
146 | yield x | ||
147 | if not topdown: | ||
148 | yield top, dirs, nondirs | ||
149 | |||
150 | ## realpath() related functions | ||
151 | def __is_path_below(self, file, root): | ||
152 | return (file + os.path.sep).startswith(root) | ||
153 | |||
154 | def __realpath_rel(self, start, rel_path, root, loop_cnt, assume_dir): | ||
155 | """Calculates real path of symlink 'start' + 'rel_path' below | ||
156 | 'root'; no part of 'start' below 'root' must contain symlinks. """ | ||
157 | have_dir = True | ||
158 | |||
159 | for d in rel_path.split(os.path.sep): | ||
160 | if not have_dir and not assume_dir: | ||
161 | raise OSError(errno.ENOENT, "no such directory %s" % start) | ||
162 | |||
163 | if d == os.path.pardir: # '..' | ||
164 | if len(start) >= len(root): | ||
165 | # do not follow '..' before root | ||
166 | start = os.path.dirname(start) | ||
167 | else: | ||
168 | # emit warning? | ||
169 | pass | ||
170 | else: | ||
171 | (start, have_dir) = self.__realpath(os.path.join(start, d), | ||
172 | root, loop_cnt, assume_dir) | ||
173 | |||
174 | assert(self.__is_path_below(start, root)) | ||
175 | |||
176 | return start | ||
177 | |||
178 | def __realpath(self, file, root, loop_cnt, assume_dir): | ||
179 | while self.islink(file) and len(file) >= len(root): | ||
180 | if loop_cnt == 0: | ||
181 | raise OSError(errno.ELOOP, file) | ||
182 | |||
183 | loop_cnt -= 1 | ||
184 | target = os.path.normpath(os.readlink(file)) | ||
185 | |||
186 | if not os.path.isabs(target): | ||
187 | tdir = os.path.dirname(file) | ||
188 | assert(self.__is_path_below(tdir, root)) | ||
189 | else: | ||
190 | tdir = root | ||
191 | |||
192 | file = self.__realpath_rel(tdir, target, root, loop_cnt, assume_dir) | ||
193 | |||
194 | try: | ||
195 | is_dir = self.isdir(file) | ||
196 | except: | ||
197 | is_dir = False | ||
198 | |||
199 | return (file, is_dir) | ||
200 | |||
201 | def realpath(self, file, root, use_physdir = True, loop_cnt = 100, assume_dir = False): | ||
202 | """ Returns the canonical path of 'file' with assuming a | ||
203 | toplevel 'root' directory. When 'use_physdir' is set, all | ||
204 | preceding path components of 'file' will be resolved first; | ||
205 | this flag should be set unless it is guaranteed that there is | ||
206 | no symlink in the path. When 'assume_dir' is not set, missing | ||
207 | path components will raise an ENOENT error""" | ||
208 | |||
209 | root = os.path.normpath(root) | ||
210 | file = os.path.normpath(file) | ||
211 | |||
212 | if not root.endswith(os.path.sep): | ||
213 | # letting root end with '/' makes some things easier | ||
214 | root = root + os.path.sep | ||
215 | |||
216 | if not self.__is_path_below(file, root): | ||
217 | raise OSError(errno.EINVAL, "file '%s' is not below root" % file) | ||
218 | |||
219 | try: | ||
220 | if use_physdir: | ||
221 | file = self.__realpath_rel(root, file[(len(root) - 1):], root, loop_cnt, assume_dir) | ||
222 | else: | ||
223 | file = self.__realpath(file, root, loop_cnt, assume_dir)[0] | ||
224 | except OSError, e: | ||
225 | if e.errno == errno.ELOOP: | ||
226 | # make ELOOP more readable; without catching it, there will | ||
227 | # be printed a backtrace with 100s of OSError exceptions | ||
228 | # else | ||
229 | raise OSError(errno.ELOOP, | ||
230 | "too much recursions while resolving '%s'; loop in '%s'" % | ||
231 | (file, e.strerror)) | ||
232 | |||
233 | raise | ||
234 | |||
235 | return file | ||