summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bitbake/lib/bb/cache.py12
-rw-r--r--bitbake/lib/bb/codeparser.py143
2 files changed, 109 insertions, 46 deletions
diff --git a/bitbake/lib/bb/cache.py b/bitbake/lib/bb/cache.py
index c7f3b7ab71..f892d7dc32 100644
--- a/bitbake/lib/bb/cache.py
+++ b/bitbake/lib/bb/cache.py
@@ -764,16 +764,6 @@ class MultiProcessCache(object):
764 764
765 self.cachedata = data 765 self.cachedata = data
766 766
767 def internSet(self, items):
768 new = set()
769 for i in items:
770 new.add(intern(i))
771 return new
772
773 def compress_keys(self, data):
774 # Override in subclasses if desired
775 return
776
777 def create_cachedata(self): 767 def create_cachedata(self):
778 data = [{}] 768 data = [{}]
779 return data 769 return data
@@ -833,8 +823,6 @@ class MultiProcessCache(object):
833 self.merge_data(extradata, data) 823 self.merge_data(extradata, data)
834 os.unlink(f) 824 os.unlink(f)
835 825
836 self.compress_keys(data)
837
838 with open(self.cachefile, "wb") as f: 826 with open(self.cachefile, "wb") as f:
839 p = pickle.Pickler(f, -1) 827 p = pickle.Pickler(f, -1)
840 p.dump([data, self.__class__.CACHE_VERSION]) 828 p.dump([data, self.__class__.CACHE_VERSION])
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
index 2e8de12f33..8b8f91a762 100644
--- a/bitbake/lib/bb/codeparser.py
+++ b/bitbake/lib/bb/codeparser.py
@@ -33,9 +33,82 @@ def check_indent(codestr):
33 return codestr 33 return codestr
34 34
35 35
36# Basically pickle, in python 2.7.3 at least, does badly with data duplication
37# upon pickling and unpickling. Combine this with duplicate objects and things
38# are a mess.
39#
40# When the sets are originally created, python calls intern() on the set keys
41# which significantly improves memory usage. Sadly the pickle/unpickle process
42# doesn't call intern() on the keys and results in the same strings being duplicated
43# in memory. This also means pickle will save the same string multiple times in
44# the cache file.
45#
46# By having shell and python cacheline objects with setstate/getstate, we force
47# the object creation through our own routine where we can call intern (via internSet).
48#
49# We also use hashable frozensets and ensure we use references to these so that
50# duplicates can be removed, both in memory and in the resulting pickled data.
51#
52# By playing these games, the size of the cache file shrinks dramatically
53# meaning faster load times and the reloaded cache files also consume much less
54# memory. Smaller cache files, faster load times and lower memory usage is good.
55#
56# A custom getstate/setstate using tuples is actually worth 15% cachesize by
57# avoiding duplication of the attribute names!
58
59class SetCache(object):
60 def __init__(self):
61 self.setcache = {}
62
63 def internSet(self, items):
64
65 new = []
66 for i in items:
67 new.append(intern(i))
68 s = frozenset(new)
69 if hash(s) in self.setcache:
70 return self.setcache[hash(s)]
71 self.setcache[hash(s)] = s
72 return s
73
74codecache = SetCache()
75
76class pythonCacheLine(object):
77 def __init__(self, refs, execs, contains):
78 self.refs = codecache.internSet(refs)
79 self.execs = codecache.internSet(execs)
80 self.contains = {}
81 for c in contains:
82 self.contains[c] = codecache.internSet(contains[c])
83
84 def __getstate__(self):
85 return (self.refs, self.execs, self.contains)
86
87 def __setstate__(self, state):
88 (refs, execs, contains) = state
89 self.__init__(refs, execs, contains)
90 def __hash__(self):
91 l = (hash(self.refs), hash(self.execs))
92 for c in sorted(self.contains.keys()):
93 l = l + (c, hash(self.contains[c]))
94 return hash(l)
95
96class shellCacheLine(object):
97 def __init__(self, execs):
98 self.execs = codecache.internSet(execs)
99
100 def __getstate__(self):
101 return (self.execs)
102
103 def __setstate__(self, state):
104 (execs) = state
105 self.__init__(execs)
106 def __hash__(self):
107 return hash(self.execs)
108
36class CodeParserCache(MultiProcessCache): 109class CodeParserCache(MultiProcessCache):
37 cache_file_name = "bb_codeparser.dat" 110 cache_file_name = "bb_codeparser.dat"
38 CACHE_VERSION = 6 111 CACHE_VERSION = 7
39 112
40 def __init__(self): 113 def __init__(self):
41 MultiProcessCache.__init__(self) 114 MultiProcessCache.__init__(self)
@@ -44,6 +117,27 @@ class CodeParserCache(MultiProcessCache):
44 self.pythoncacheextras = self.cachedata_extras[0] 117 self.pythoncacheextras = self.cachedata_extras[0]
45 self.shellcacheextras = self.cachedata_extras[1] 118 self.shellcacheextras = self.cachedata_extras[1]
46 119
120 # To avoid duplication in the codeparser cache, keep
121 # a lookup of hashes of objects we already have
122 self.pythoncachelines = {}
123 self.shellcachelines = {}
124
125 def newPythonCacheLine(self, refs, execs, contains):
126 cacheline = pythonCacheLine(refs, execs, contains)
127 h = hash(cacheline)
128 if h in self.pythoncachelines:
129 return self.pythoncachelines[h]
130 self.pythoncachelines[h] = cacheline
131 return cacheline
132
133 def newShellCacheLine(self, execs):
134 cacheline = shellCacheLine(execs)
135 h = hash(cacheline)
136 if h in self.shellcachelines:
137 return self.shellcachelines[h]
138 self.shellcachelines[h] = cacheline
139 return cacheline
140
47 def init_cache(self, d): 141 def init_cache(self, d):
48 MultiProcessCache.init_cache(self, d) 142 MultiProcessCache.init_cache(self, d)
49 143
@@ -51,25 +145,6 @@ class CodeParserCache(MultiProcessCache):
51 self.pythoncache = self.cachedata[0] 145 self.pythoncache = self.cachedata[0]
52 self.shellcache = self.cachedata[1] 146 self.shellcache = self.cachedata[1]
53 147
54 def compress_keys(self, data):
55 # When the dicts are originally created, python calls intern() on the set keys
56 # which significantly improves memory usage. Sadly the pickle/unpickle process
57 # doesn't call intern() on the keys and results in the same strings being duplicated
58 # in memory. This also means pickle will save the same string multiple times in
59 # the cache file. By interning the data here, the cache file shrinks dramatically
60 # meaning faster load times and the reloaded cache files also consume much less
61 # memory. This is worth any performance hit from this loops and the use of the
62 # intern() data storage.
63 # Python 3.x may behave better in this area
64 for h in data[0]:
65 data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
66 data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
67 for k in data[0][h]["contains"]:
68 data[0][h]["contains"][k] = self.internSet(data[0][h]["contains"][k])
69 for h in data[1]:
70 data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
71 return
72
73 def create_cachedata(self): 148 def create_cachedata(self):
74 data = [{}, {}] 149 data = [{}, {}]
75 return data 150 return data
@@ -168,15 +243,19 @@ class PythonParser():
168 h = hash(str(node)) 243 h = hash(str(node))
169 244
170 if h in codeparsercache.pythoncache: 245 if h in codeparsercache.pythoncache:
171 self.references = codeparsercache.pythoncache[h]["refs"] 246 self.references = set(codeparsercache.pythoncache[h].refs)
172 self.execs = codeparsercache.pythoncache[h]["execs"] 247 self.execs = set(codeparsercache.pythoncache[h].execs)
173 self.contains = codeparsercache.pythoncache[h]["contains"] 248 self.contains = {}
249 for i in codeparsercache.pythoncache[h].contains:
250 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
174 return 251 return
175 252
176 if h in codeparsercache.pythoncacheextras: 253 if h in codeparsercache.pythoncacheextras:
177 self.references = codeparsercache.pythoncacheextras[h]["refs"] 254 self.references = set(codeparsercache.pythoncacheextras[h].refs)
178 self.execs = codeparsercache.pythoncacheextras[h]["execs"] 255 self.execs = set(codeparsercache.pythoncacheextras[h].execs)
179 self.contains = codeparsercache.pythoncacheextras[h]["contains"] 256 self.contains = {}
257 for i in codeparsercache.pythoncacheextras[h].contains:
258 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
180 return 259 return
181 260
182 code = compile(check_indent(str(node)), "<string>", "exec", 261 code = compile(check_indent(str(node)), "<string>", "exec",
@@ -188,10 +267,7 @@ class PythonParser():
188 267
189 self.execs.update(self.var_execs) 268 self.execs.update(self.var_execs)
190 269
191 codeparsercache.pythoncacheextras[h] = {} 270 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
192 codeparsercache.pythoncacheextras[h]["refs"] = self.references
193 codeparsercache.pythoncacheextras[h]["execs"] = self.execs
194 codeparsercache.pythoncacheextras[h]["contains"] = self.contains
195 271
196class ShellParser(): 272class ShellParser():
197 def __init__(self, name, log): 273 def __init__(self, name, log):
@@ -210,18 +286,17 @@ class ShellParser():
210 h = hash(str(value)) 286 h = hash(str(value))
211 287
212 if h in codeparsercache.shellcache: 288 if h in codeparsercache.shellcache:
213 self.execs = codeparsercache.shellcache[h]["execs"] 289 self.execs = set(codeparsercache.shellcache[h].execs)
214 return self.execs 290 return self.execs
215 291
216 if h in codeparsercache.shellcacheextras: 292 if h in codeparsercache.shellcacheextras:
217 self.execs = codeparsercache.shellcacheextras[h]["execs"] 293 self.execs = set(codeparsercache.shellcacheextras[h].execs)
218 return self.execs 294 return self.execs
219 295
220 self._parse_shell(value) 296 self._parse_shell(value)
221 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) 297 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
222 298
223 codeparsercache.shellcacheextras[h] = {} 299 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
224 codeparsercache.shellcacheextras[h]["execs"] = self.execs
225 300
226 return self.execs 301 return self.execs
227 302