summaryrefslogtreecommitdiffstats
path: root/bitbake/lib
diff options
context:
space:
mode:
authorRichard Purdie <richard.purdie@linuxfoundation.org>2012-03-11 14:30:31 +0000
committerRichard Purdie <richard.purdie@linuxfoundation.org>2012-03-12 15:52:33 +0000
commitc27b7aab3c37f182bf9acc5d459185f32fb195d0 (patch)
treebe1e1144c068a9d109eedbc8f113187c1d14438a /bitbake/lib
parent7bf0a790b23833a1b426d2349885459112fb5d7c (diff)
downloadpoky-c27b7aab3c37f182bf9acc5d459185f32fb195d0.tar.gz
codeparser: Call intern over the set contents for better cache performance
See the comment in the code in the commit for more information. (Bitbake rev: 2d56dc7b1f0d186e14c4c8a949b280b6b3fc31de) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib')
-rw-r--r--bitbake/lib/bb/codeparser.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
index 04a34f944a..af2e19411c 100644
--- a/bitbake/lib/bb/codeparser.py
+++ b/bitbake/lib/bb/codeparser.py
@@ -98,6 +98,12 @@ def parser_cache_save(d):
98 bb.utils.unlockfile(lf) 98 bb.utils.unlockfile(lf)
99 bb.utils.unlockfile(glf) 99 bb.utils.unlockfile(glf)
100 100
101def internSet(items):
102 new = set()
103 for i in items:
104 new.add(intern(i))
105 return new
106
101def parser_cache_savemerge(d): 107def parser_cache_savemerge(d):
102 cachefile = parser_cachefile(d) 108 cachefile = parser_cachefile(d)
103 if not cachefile: 109 if not cachefile:
@@ -133,6 +139,21 @@ def parser_cache_savemerge(d):
133 data[1][h] = extradata[1][h] 139 data[1][h] = extradata[1][h]
134 os.unlink(f) 140 os.unlink(f)
135 141
142 # When the dicts are originally created, python calls intern() on the set keys
143 # which significantly improves memory usage. Sadly the pickle/unpickle process
144 # doesn't call intern() on the keys and results in the same strings being duplicated
145 # in memory. This also means pickle will save the same string multiple times in
146 # the cache file. By interning the data here, the cache file shrinks dramatically
147 # meaning faster load times and the reloaded cache files also consume much less
148 # memory. This is worth any performance hit from this loops and the use of the
149 # intern() data storage.
150 # Python 3.x may behave better in this area
151 for h in data[0]:
152 data[0][h]["refs"] = internSet(data[0][h]["refs"])
153 data[0][h]["execs"] = internSet(data[0][h]["execs"])
154 for h in data[1]:
155 data[1][h]["execs"] = internSet(data[1][h]["execs"])
156
136 p = pickle.Pickler(file(cachefile, "wb"), -1) 157 p = pickle.Pickler(file(cachefile, "wb"), -1)
137 p.dump([data, PARSERCACHE_VERSION]) 158 p.dump([data, PARSERCACHE_VERSION])
138 159