diff options
-rw-r--r-- | bitbake/lib/bb/cache.py | 12 | ||||
-rw-r--r-- | bitbake/lib/bb/codeparser.py | 143 |
2 files changed, 109 insertions, 46 deletions
diff --git a/bitbake/lib/bb/cache.py b/bitbake/lib/bb/cache.py index c7f3b7ab71..f892d7dc32 100644 --- a/bitbake/lib/bb/cache.py +++ b/bitbake/lib/bb/cache.py | |||
@@ -764,16 +764,6 @@ class MultiProcessCache(object): | |||
764 | 764 | ||
765 | self.cachedata = data | 765 | self.cachedata = data |
766 | 766 | ||
767 | def internSet(self, items): | ||
768 | new = set() | ||
769 | for i in items: | ||
770 | new.add(intern(i)) | ||
771 | return new | ||
772 | |||
773 | def compress_keys(self, data): | ||
774 | # Override in subclasses if desired | ||
775 | return | ||
776 | |||
777 | def create_cachedata(self): | 767 | def create_cachedata(self): |
778 | data = [{}] | 768 | data = [{}] |
779 | return data | 769 | return data |
@@ -833,8 +823,6 @@ class MultiProcessCache(object): | |||
833 | self.merge_data(extradata, data) | 823 | self.merge_data(extradata, data) |
834 | os.unlink(f) | 824 | os.unlink(f) |
835 | 825 | ||
836 | self.compress_keys(data) | ||
837 | |||
838 | with open(self.cachefile, "wb") as f: | 826 | with open(self.cachefile, "wb") as f: |
839 | p = pickle.Pickler(f, -1) | 827 | p = pickle.Pickler(f, -1) |
840 | p.dump([data, self.__class__.CACHE_VERSION]) | 828 | p.dump([data, self.__class__.CACHE_VERSION]) |
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py index 2e8de12f33..8b8f91a762 100644 --- a/bitbake/lib/bb/codeparser.py +++ b/bitbake/lib/bb/codeparser.py | |||
@@ -33,9 +33,82 @@ def check_indent(codestr): | |||
33 | return codestr | 33 | return codestr |
34 | 34 | ||
35 | 35 | ||
36 | # Basically pickle, in python 2.7.3 at least, does badly with data duplication | ||
37 | # upon pickling and unpickling. Combine this with duplicate objects and things | ||
38 | # are a mess. | ||
39 | # | ||
40 | # When the sets are originally created, python calls intern() on the set keys | ||
41 | # which significantly improves memory usage. Sadly the pickle/unpickle process | ||
42 | # doesn't call intern() on the keys and results in the same strings being duplicated | ||
43 | # in memory. This also means pickle will save the same string multiple times in | ||
44 | # the cache file. | ||
45 | # | ||
46 | # By having shell and python cacheline objects with setstate/getstate, we force | ||
47 | # the object creation through our own routine where we can call intern (via internSet). | ||
48 | # | ||
49 | # We also use hashable frozensets and ensure we use references to these so that | ||
50 | # duplicates can be removed, both in memory and in the resulting pickled data. | ||
51 | # | ||
52 | # By playing these games, the size of the cache file shrinks dramatically | ||
53 | # meaning faster load times and the reloaded cache files also consume much less | ||
54 | # memory. Smaller cache files, faster load times and lower memory usage is good. | ||
55 | # | ||
56 | # A custom getstate/setstate using tuples is actually worth 15% cachesize by | ||
57 | # avoiding duplication of the attribute names! | ||
58 | |||
59 | class SetCache(object): | ||
60 | def __init__(self): | ||
61 | self.setcache = {} | ||
62 | |||
63 | def internSet(self, items): | ||
64 | |||
65 | new = [] | ||
66 | for i in items: | ||
67 | new.append(intern(i)) | ||
68 | s = frozenset(new) | ||
69 | if hash(s) in self.setcache: | ||
70 | return self.setcache[hash(s)] | ||
71 | self.setcache[hash(s)] = s | ||
72 | return s | ||
73 | |||
74 | codecache = SetCache() | ||
75 | |||
76 | class pythonCacheLine(object): | ||
77 | def __init__(self, refs, execs, contains): | ||
78 | self.refs = codecache.internSet(refs) | ||
79 | self.execs = codecache.internSet(execs) | ||
80 | self.contains = {} | ||
81 | for c in contains: | ||
82 | self.contains[c] = codecache.internSet(contains[c]) | ||
83 | |||
84 | def __getstate__(self): | ||
85 | return (self.refs, self.execs, self.contains) | ||
86 | |||
87 | def __setstate__(self, state): | ||
88 | (refs, execs, contains) = state | ||
89 | self.__init__(refs, execs, contains) | ||
90 | def __hash__(self): | ||
91 | l = (hash(self.refs), hash(self.execs)) | ||
92 | for c in sorted(self.contains.keys()): | ||
93 | l = l + (c, hash(self.contains[c])) | ||
94 | return hash(l) | ||
95 | |||
96 | class shellCacheLine(object): | ||
97 | def __init__(self, execs): | ||
98 | self.execs = codecache.internSet(execs) | ||
99 | |||
100 | def __getstate__(self): | ||
101 | return (self.execs) | ||
102 | |||
103 | def __setstate__(self, state): | ||
104 | (execs) = state | ||
105 | self.__init__(execs) | ||
106 | def __hash__(self): | ||
107 | return hash(self.execs) | ||
108 | |||
36 | class CodeParserCache(MultiProcessCache): | 109 | class CodeParserCache(MultiProcessCache): |
37 | cache_file_name = "bb_codeparser.dat" | 110 | cache_file_name = "bb_codeparser.dat" |
38 | CACHE_VERSION = 6 | 111 | CACHE_VERSION = 7 |
39 | 112 | ||
40 | def __init__(self): | 113 | def __init__(self): |
41 | MultiProcessCache.__init__(self) | 114 | MultiProcessCache.__init__(self) |
@@ -44,6 +117,27 @@ class CodeParserCache(MultiProcessCache): | |||
44 | self.pythoncacheextras = self.cachedata_extras[0] | 117 | self.pythoncacheextras = self.cachedata_extras[0] |
45 | self.shellcacheextras = self.cachedata_extras[1] | 118 | self.shellcacheextras = self.cachedata_extras[1] |
46 | 119 | ||
120 | # To avoid duplication in the codeparser cache, keep | ||
121 | # a lookup of hashes of objects we already have | ||
122 | self.pythoncachelines = {} | ||
123 | self.shellcachelines = {} | ||
124 | |||
125 | def newPythonCacheLine(self, refs, execs, contains): | ||
126 | cacheline = pythonCacheLine(refs, execs, contains) | ||
127 | h = hash(cacheline) | ||
128 | if h in self.pythoncachelines: | ||
129 | return self.pythoncachelines[h] | ||
130 | self.pythoncachelines[h] = cacheline | ||
131 | return cacheline | ||
132 | |||
133 | def newShellCacheLine(self, execs): | ||
134 | cacheline = shellCacheLine(execs) | ||
135 | h = hash(cacheline) | ||
136 | if h in self.shellcachelines: | ||
137 | return self.shellcachelines[h] | ||
138 | self.shellcachelines[h] = cacheline | ||
139 | return cacheline | ||
140 | |||
47 | def init_cache(self, d): | 141 | def init_cache(self, d): |
48 | MultiProcessCache.init_cache(self, d) | 142 | MultiProcessCache.init_cache(self, d) |
49 | 143 | ||
@@ -51,25 +145,6 @@ class CodeParserCache(MultiProcessCache): | |||
51 | self.pythoncache = self.cachedata[0] | 145 | self.pythoncache = self.cachedata[0] |
52 | self.shellcache = self.cachedata[1] | 146 | self.shellcache = self.cachedata[1] |
53 | 147 | ||
54 | def compress_keys(self, data): | ||
55 | # When the dicts are originally created, python calls intern() on the set keys | ||
56 | # which significantly improves memory usage. Sadly the pickle/unpickle process | ||
57 | # doesn't call intern() on the keys and results in the same strings being duplicated | ||
58 | # in memory. This also means pickle will save the same string multiple times in | ||
59 | # the cache file. By interning the data here, the cache file shrinks dramatically | ||
60 | # meaning faster load times and the reloaded cache files also consume much less | ||
61 | # memory. This is worth any performance hit from this loops and the use of the | ||
62 | # intern() data storage. | ||
63 | # Python 3.x may behave better in this area | ||
64 | for h in data[0]: | ||
65 | data[0][h]["refs"] = self.internSet(data[0][h]["refs"]) | ||
66 | data[0][h]["execs"] = self.internSet(data[0][h]["execs"]) | ||
67 | for k in data[0][h]["contains"]: | ||
68 | data[0][h]["contains"][k] = self.internSet(data[0][h]["contains"][k]) | ||
69 | for h in data[1]: | ||
70 | data[1][h]["execs"] = self.internSet(data[1][h]["execs"]) | ||
71 | return | ||
72 | |||
73 | def create_cachedata(self): | 148 | def create_cachedata(self): |
74 | data = [{}, {}] | 149 | data = [{}, {}] |
75 | return data | 150 | return data |
@@ -168,15 +243,19 @@ class PythonParser(): | |||
168 | h = hash(str(node)) | 243 | h = hash(str(node)) |
169 | 244 | ||
170 | if h in codeparsercache.pythoncache: | 245 | if h in codeparsercache.pythoncache: |
171 | self.references = codeparsercache.pythoncache[h]["refs"] | 246 | self.references = set(codeparsercache.pythoncache[h].refs) |
172 | self.execs = codeparsercache.pythoncache[h]["execs"] | 247 | self.execs = set(codeparsercache.pythoncache[h].execs) |
173 | self.contains = codeparsercache.pythoncache[h]["contains"] | 248 | self.contains = {} |
249 | for i in codeparsercache.pythoncache[h].contains: | ||
250 | self.contains[i] = set(codeparsercache.pythoncache[h].contains[i]) | ||
174 | return | 251 | return |
175 | 252 | ||
176 | if h in codeparsercache.pythoncacheextras: | 253 | if h in codeparsercache.pythoncacheextras: |
177 | self.references = codeparsercache.pythoncacheextras[h]["refs"] | 254 | self.references = set(codeparsercache.pythoncacheextras[h].refs) |
178 | self.execs = codeparsercache.pythoncacheextras[h]["execs"] | 255 | self.execs = set(codeparsercache.pythoncacheextras[h].execs) |
179 | self.contains = codeparsercache.pythoncacheextras[h]["contains"] | 256 | self.contains = {} |
257 | for i in codeparsercache.pythoncacheextras[h].contains: | ||
258 | self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i]) | ||
180 | return | 259 | return |
181 | 260 | ||
182 | code = compile(check_indent(str(node)), "<string>", "exec", | 261 | code = compile(check_indent(str(node)), "<string>", "exec", |
@@ -188,10 +267,7 @@ class PythonParser(): | |||
188 | 267 | ||
189 | self.execs.update(self.var_execs) | 268 | self.execs.update(self.var_execs) |
190 | 269 | ||
191 | codeparsercache.pythoncacheextras[h] = {} | 270 | codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains) |
192 | codeparsercache.pythoncacheextras[h]["refs"] = self.references | ||
193 | codeparsercache.pythoncacheextras[h]["execs"] = self.execs | ||
194 | codeparsercache.pythoncacheextras[h]["contains"] = self.contains | ||
195 | 271 | ||
196 | class ShellParser(): | 272 | class ShellParser(): |
197 | def __init__(self, name, log): | 273 | def __init__(self, name, log): |
@@ -210,18 +286,17 @@ class ShellParser(): | |||
210 | h = hash(str(value)) | 286 | h = hash(str(value)) |
211 | 287 | ||
212 | if h in codeparsercache.shellcache: | 288 | if h in codeparsercache.shellcache: |
213 | self.execs = codeparsercache.shellcache[h]["execs"] | 289 | self.execs = set(codeparsercache.shellcache[h].execs) |
214 | return self.execs | 290 | return self.execs |
215 | 291 | ||
216 | if h in codeparsercache.shellcacheextras: | 292 | if h in codeparsercache.shellcacheextras: |
217 | self.execs = codeparsercache.shellcacheextras[h]["execs"] | 293 | self.execs = set(codeparsercache.shellcacheextras[h].execs) |
218 | return self.execs | 294 | return self.execs |
219 | 295 | ||
220 | self._parse_shell(value) | 296 | self._parse_shell(value) |
221 | self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) | 297 | self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) |
222 | 298 | ||
223 | codeparsercache.shellcacheextras[h] = {} | 299 | codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs) |
224 | codeparsercache.shellcacheextras[h]["execs"] = self.execs | ||
225 | 300 | ||
226 | return self.execs | 301 | return self.execs |
227 | 302 | ||