codeparser: Call intern over the set contents for better cache performance

See the comment in the code in the commit for more information. (Bitbake rev: 2d56dc7b1f0d186e14c4c8a949b280b6b3fc31de) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
author: Richard Purdie <richard.purdie@linuxfoundation.org> 2012-03-11 14:30:31 +0000
committer: Richard Purdie <richard.purdie@linuxfoundation.org> 2012-03-12 15:52:33 +0000
commit: c27b7aab3c37f182bf9acc5d459185f32fb195d0 (patch)
tree: be1e1144c068a9d109eedbc8f113187c1d14438a /bitbake
parent: 7bf0a790b23833a1b426d2349885459112fb5d7c (diff)
download: poky-c27b7aab3c37f182bf9acc5d459185f32fb195d0.tar.gz
1 files changed, 21 insertions, 0 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
index 04a34f944a..af2e19411c 100644
--- a/bitbake/lib/bb/codeparser.py
+++ b/bitbake/lib/bb/codeparser.py
@@ -98,6 +98,12 @@ def parser_cache_save(d):
    bb.utils.unlockfile(lf)
    bb.utils.unlockfile(glf)
+def internSet(items):
+    new = set()
+    for i in items:
+        new.add(intern(i))
+    return new
 def parser_cache_savemerge(d):
    cachefile = parser_cachefile(d)
    if not cachefile:
@@ -133,6 +139,21 @@ def parser_cache_savemerge(d):
                data[1][h] = extradata[1][h]
        os.unlink(f)
+    # When the dicts are originally created, python calls intern() on the set keys
+    # which significantly improves memory usage. Sadly the pickle/unpickle process 
+    # doesn't call intern() on the keys and results in the same strings being duplicated
+    # in memory. This also means pickle will save the same string multiple times in 
+    # the cache file. By interning the data here, the cache file shrinks dramatically
+    # meaning faster load times and the reloaded cache files also consume much less 
+    # memory. This is worth any performance hit from this loops and the use of the 
+    # intern() data storage.
+    # Python 3.x may behave better in this area
+    for h in data[0]:
+        data[0][h]["refs"] = internSet(data[0][h]["refs"])
+        data[0][h]["execs"] = internSet(data[0][h]["execs"])
+    for h in data[1]:
+        data[1][h]["execs"] = internSet(data[1][h]["execs"])
    p = pickle.Pickler(file(cachefile, "wb"), -1)
    p.dump([data, PARSERCACHE_VERSION])
author	Richard Purdie <richard.purdie@linuxfoundation.org>	2012-03-11 14:30:31 +0000
committer	Richard Purdie <richard.purdie@linuxfoundation.org>	2012-03-12 15:52:33 +0000
commit	c27b7aab3c37f182bf9acc5d459185f32fb195d0 (patch)
tree	be1e1144c068a9d109eedbc8f113187c1d14438a /bitbake
parent	7bf0a790b23833a1b426d2349885459112fb5d7c (diff)
download	poky-c27b7aab3c37f182bf9acc5d459185f32fb195d0.tar.gz

diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py index 04a34f944a..af2e19411c 100644 --- a/bitbake/lib/bb/codeparser.py +++ b/bitbake/lib/bb/codeparser.py
@@ -98,6 +98,12 @@ def parser_cache_save(d):
98	bb.utils.unlockfile(lf)	98	bb.utils.unlockfile(lf)
99	bb.utils.unlockfile(glf)	99	bb.utils.unlockfile(glf)
100		100
		101	def internSet(items):
		102	new = set()
		103	for i in items:
		104	new.add(intern(i))
		105	return new
		106
101	def parser_cache_savemerge(d):	107	def parser_cache_savemerge(d):
102	cachefile = parser_cachefile(d)	108	cachefile = parser_cachefile(d)
103	if not cachefile:	109	if not cachefile:
@@ -133,6 +139,21 @@ def parser_cache_savemerge(d):
133	data[1][h] = extradata[1][h]	139	data[1][h] = extradata[1][h]
134	os.unlink(f)	140	os.unlink(f)
135		141
		142	# When the dicts are originally created, python calls intern() on the set keys
		143	# which significantly improves memory usage. Sadly the pickle/unpickle process
		144	# doesn't call intern() on the keys and results in the same strings being duplicated
		145	# in memory. This also means pickle will save the same string multiple times in
		146	# the cache file. By interning the data here, the cache file shrinks dramatically
		147	# meaning faster load times and the reloaded cache files also consume much less
		148	# memory. This is worth any performance hit from this loops and the use of the
		149	# intern() data storage.
		150	# Python 3.x may behave better in this area
		151	for h in data[0]:
		152	data[0][h]["refs"] = internSet(data[0][h]["refs"])
		153	data[0][h]["execs"] = internSet(data[0][h]["execs"])
		154	for h in data[1]:
		155	data[1][h]["execs"] = internSet(data[1][h]["execs"])
		156
136	p = pickle.Pickler(file(cachefile, "wb"), -1)	157	p = pickle.Pickler(file(cachefile, "wb"), -1)
137	p.dump([data, PARSERCACHE_VERSION])	158	p.dump([data, PARSERCACHE_VERSION])
138		159