diff options
author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2012-03-11 14:30:31 +0000 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2012-03-12 15:52:33 +0000 |
commit | c27b7aab3c37f182bf9acc5d459185f32fb195d0 (patch) | |
tree | be1e1144c068a9d109eedbc8f113187c1d14438a /bitbake/lib | |
parent | 7bf0a790b23833a1b426d2349885459112fb5d7c (diff) | |
download | poky-c27b7aab3c37f182bf9acc5d459185f32fb195d0.tar.gz |
codeparser: Call intern over the set contents for better cache performance
See the comment in the code in the commit for more information.
(Bitbake rev: 2d56dc7b1f0d186e14c4c8a949b280b6b3fc31de)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib')
-rw-r--r-- | bitbake/lib/bb/codeparser.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py index 04a34f944a..af2e19411c 100644 --- a/bitbake/lib/bb/codeparser.py +++ b/bitbake/lib/bb/codeparser.py | |||
@@ -98,6 +98,12 @@ def parser_cache_save(d): | |||
98 | bb.utils.unlockfile(lf) | 98 | bb.utils.unlockfile(lf) |
99 | bb.utils.unlockfile(glf) | 99 | bb.utils.unlockfile(glf) |
100 | 100 | ||
101 | def internSet(items): | ||
102 | new = set() | ||
103 | for i in items: | ||
104 | new.add(intern(i)) | ||
105 | return new | ||
106 | |||
101 | def parser_cache_savemerge(d): | 107 | def parser_cache_savemerge(d): |
102 | cachefile = parser_cachefile(d) | 108 | cachefile = parser_cachefile(d) |
103 | if not cachefile: | 109 | if not cachefile: |
@@ -133,6 +139,21 @@ def parser_cache_savemerge(d): | |||
133 | data[1][h] = extradata[1][h] | 139 | data[1][h] = extradata[1][h] |
134 | os.unlink(f) | 140 | os.unlink(f) |
135 | 141 | ||
142 | # When the dicts are originally created, python calls intern() on the set keys | ||
143 | # which significantly improves memory usage. Sadly the pickle/unpickle process | ||
144 | # doesn't call intern() on the keys and results in the same strings being duplicated | ||
145 | # in memory. This also means pickle will save the same string multiple times in | ||
146 | # the cache file. By interning the data here, the cache file shrinks dramatically | ||
147 | # meaning faster load times and the reloaded cache files also consume much less | ||
148 | # memory. This is worth any performance hit from this loops and the use of the | ||
149 | # intern() data storage. | ||
150 | # Python 3.x may behave better in this area | ||
151 | for h in data[0]: | ||
152 | data[0][h]["refs"] = internSet(data[0][h]["refs"]) | ||
153 | data[0][h]["execs"] = internSet(data[0][h]["execs"]) | ||
154 | for h in data[1]: | ||
155 | data[1][h]["execs"] = internSet(data[1][h]["execs"]) | ||
156 | |||
136 | p = pickle.Pickler(file(cachefile, "wb"), -1) | 157 | p = pickle.Pickler(file(cachefile, "wb"), -1) |
137 | p.dump([data, PARSERCACHE_VERSION]) | 158 | p.dump([data, PARSERCACHE_VERSION]) |
138 | 159 | ||