bitbake: refactor out codeparser cache into a separate class

We want to be able to reuse most this functionality for the file checksum cache. (Bitbake rev: 0fe3cb1438d297f90dd0fc6b26362ecbff75c76d) Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
author: Paul Eggleton <paul.eggleton@linux.intel.com> 2012-05-23 00:23:31 +0100
committer: Richard Purdie <richard.purdie@linuxfoundation.org> 2012-05-23 11:33:18 +0100
commit: d7b818b51f3e6dded0c0885cdfed5a24cda3b428 (patch)
tree: 6cd18f70cb71682aad55a6079da32c25f60bcbf7
parent: 644b30adfb8fb158a253712033f717aadf6f2c68 (diff)
download: poky-d7b818b51f3e6dded0c0885cdfed5a24cda3b428.tar.gz
2 files changed, 171 insertions, 136 deletions
diff --git a/bitbake/lib/bb/cache.py b/bitbake/lib/bb/cache.py
index 47e814b577..36e6356f51 100644
--- a/bitbake/lib/bb/cache.py
+++ b/bitbake/lib/bb/cache.py
@@ -1,11 +1,12 @@
 # ex:ts=4:sw=4:sts=4:et
 # -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
 #
-# BitBake 'Event' implementation
+# BitBake Cache implementation
 #
 # Caching of bitbake variables before task execution
 # Copyright (C) 2006        Richard Purdie
+# Copyright (C) 2012        Intel Corporation
 # but small sections based on code from bin/bitbake:
 # Copyright (C) 2003, 2004  Chris Larson
@@ -703,4 +704,115 @@ class CacheData(object):
        for info in info_array:
            info.add_cacheData(self, fn)
-        
+class MultiProcessCache(object):
+    """
+    BitBake multi-process cache implementation
+    Used by the codeparser & file checksum caches
+    """
+    def __init__(self):
+        self.cachefile = None
+        self.cachedata = self.create_cachedata()
+        self.cachedata_extras = self.create_cachedata()
+    def init_cache(self, d):
+        cachedir = (d.getVar("PERSISTENT_DIR", True) or
+                    d.getVar("CACHE", True))
+        if cachedir in [None, '']:
+            return
+        bb.utils.mkdirhier(cachedir)
+        self.cachefile = os.path.join(cachedir, self.__class__.cache_file_name)
+        logger.debug(1, "Using cache in '%s'", self.cachefile)
+        try:
+            p = pickle.Unpickler(file(self.cachefile, "rb"))
+            data, version = p.load()
+        except:
+            return
+        if version != self.__class__.CACHE_VERSION:
+            return
+        self.cachedata = data
+    def internSet(self, items):
+        new = set()
+        for i in items:
+            new.add(intern(i))
+        return new
+    def compress_keys(self, data):
+        # Override in subclasses if desired
+        return
+    def create_cachedata(self):
+        data = [{}]
+        return data
+    def save_extras(self, d):
+        if not self.cachefile:
+            return
+        glf = bb.utils.lockfile(self.cachefile + ".lock", shared=True)
+        i = os.getpid()
+        lf = None
+        while not lf:
+            lf = bb.utils.lockfile(self.cachefile + ".lock." + str(i), retry=False)
+            if not lf or os.path.exists(self.cachefile + "-" + str(i)):
+                if lf:
+                    bb.utils.unlockfile(lf)
+                    lf = None
+                i = i + 1
+                continue
+            p = pickle.Pickler(file(self.cachefile + "-" + str(i), "wb"), -1)
+            p.dump([self.cachedata_extras, self.__class__.CACHE_VERSION])
+        bb.utils.unlockfile(lf)
+        bb.utils.unlockfile(glf)
+    def merge_data(self, source, dest):
+        for j in range(0,len(dest)):
+            for h in source[j]:
+                if h not in dest[j]:
+                    dest[j][h] = source[j][h]
+    def save_merge(self, d):
+        if not self.cachefile:
+            return
+        glf = bb.utils.lockfile(self.cachefile + ".lock")
+        try:
+            p = pickle.Unpickler(file(self.cachefile, "rb"))
+            data, version = p.load()
+        except (IOError, EOFError):
+            data, version = None, None
+        if version != self.__class__.CACHE_VERSION:
+            data = self.create_cachedata()
+        for f in [y for y in os.listdir(os.path.dirname(self.cachefile)) if y.startswith(os.path.basename(self.cachefile) + '-')]:
+            f = os.path.join(os.path.dirname(self.cachefile), f)
+            try:
+                p = pickle.Unpickler(file(f, "rb"))
+                extradata, version = p.load()
+            except (IOError, EOFError):
+                extradata, version = self.create_cachedata(), None
+            if version != self.__class__.CACHE_VERSION:
+                continue
+            self.merge_data(extradata, data)
+            os.unlink(f)
+        self.compress_keys(data)
+        p = pickle.Pickler(file(self.cachefile, "wb"), -1)
+        p.dump([data, self.__class__.CACHE_VERSION])
+        bb.utils.unlockfile(glf)
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
index af2e19411c..d7d3f513d9 100644
--- a/bitbake/lib/bb/codeparser.py
+++ b/bitbake/lib/bb/codeparser.py
@@ -5,10 +5,10 @@ import os.path
 import bb.utils, bb.data
 from itertools import chain
 from pysh import pyshyacc, pyshlex, sherrors
+from bb.cache import MultiProcessCache
 logger = logging.getLogger('BitBake.CodeParser')
-PARSERCACHE_VERSION = 2
 try:
    import cPickle as pickle
@@ -32,133 +32,56 @@ def check_indent(codestr):
    return codestr
-pythonparsecache = {}
-shellparsecache = {}
-pythonparsecacheextras = {}
-shellparsecacheextras = {}
+class CodeParserCache(MultiProcessCache):
-def parser_cachefile(d):
+    cache_file_name = "bb_codeparser.dat"
-    cachedir = (d.getVar("PERSISTENT_DIR", True) or
+    CACHE_VERSION = 2
-                d.getVar("CACHE", True))
-    if cachedir in [None, '']:
+    def __init__(self):
-        return None
+        MultiProcessCache.__init__(self)
-    bb.utils.mkdirhier(cachedir)
+        self.pythoncache = self.cachedata[0]
-    cachefile = os.path.join(cachedir, "bb_codeparser.dat")
+        self.shellcache = self.cachedata[1]
-    logger.debug(1, "Using cache in '%s' for codeparser cache", cachefile)
+        self.pythoncacheextras = self.cachedata_extras[0]
-    return cachefile
+        self.shellcacheextras = self.cachedata_extras[1]
-def parser_cache_init(d):
+    def init_cache(self, d):
-    global pythonparsecache
+        MultiProcessCache.init_cache(self, d)
-    global shellparsecache
+        # cachedata gets re-assigned in the parent
-    cachefile = parser_cachefile(d)
+        self.pythoncache = self.cachedata[0]
-    if not cachefile:
+        self.shellcache = self.cachedata[1]
+    def compress_keys(self, data):
+        # When the dicts are originally created, python calls intern() on the set keys
+        # which significantly improves memory usage. Sadly the pickle/unpickle process
+        # doesn't call intern() on the keys and results in the same strings being duplicated
+        # in memory. This also means pickle will save the same string multiple times in
+        # the cache file. By interning the data here, the cache file shrinks dramatically
+        # meaning faster load times and the reloaded cache files also consume much less
+        # memory. This is worth any performance hit from this loops and the use of the
+        # intern() data storage.
+        # Python 3.x may behave better in this area
+        for h in data[0]:
+            data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
+            data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
+        for h in data[1]:
+            data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
        return
-    try:
+    def create_cachedata(self):
-        p = pickle.Unpickler(file(cachefile, "rb"))
+        data = [{}, {}]
-        data, version = p.load()
+        return data
-    except:
-        return
-    if version != PARSERCACHE_VERSION:
+codeparsercache = CodeParserCache()
-        return
-    pythonparsecache = data[0]
+def parser_cache_init(d):
-    shellparsecache = data[1]
+    codeparsercache.init_cache(d)
 def parser_cache_save(d):
-    cachefile = parser_cachefile(d)
+    codeparsercache.save_extras(d)
-    if not cachefile:
-        return
-    glf = bb.utils.lockfile(cachefile + ".lock", shared=True)
-    i = os.getpid()
-    lf = None
-    while not lf:
-        shellcache = {}
-        pythoncache = {}
-        lf = bb.utils.lockfile(cachefile + ".lock." + str(i), retry=False)
-        if not lf or os.path.exists(cachefile + "-" + str(i)):
-            if lf:
-               bb.utils.unlockfile(lf) 
-               lf = None
-            i = i + 1
-            continue
-        shellcache = shellparsecacheextras
-        pythoncache = pythonparsecacheextras
-        p = pickle.Pickler(file(cachefile + "-" + str(i), "wb"), -1)
-        p.dump([[pythoncache, shellcache], PARSERCACHE_VERSION])
-    bb.utils.unlockfile(lf)
-    bb.utils.unlockfile(glf)
-def internSet(items):
-    new = set()
-    for i in items:
-        new.add(intern(i))
-    return new
 def parser_cache_savemerge(d):
-    cachefile = parser_cachefile(d)
+    codeparsercache.save_merge(d)
-    if not cachefile:
-        return
-    glf = bb.utils.lockfile(cachefile + ".lock")
-    try:
-        p = pickle.Unpickler(file(cachefile, "rb"))
-        data, version = p.load()
-    except (IOError, EOFError):
-        data, version = None, None
-    if version != PARSERCACHE_VERSION:
-        data = [{}, {}]
-    for f in [y for y in os.listdir(os.path.dirname(cachefile)) if y.startswith(os.path.basename(cachefile) + '-')]:
-        f = os.path.join(os.path.dirname(cachefile), f)
-        try:
-            p = pickle.Unpickler(file(f, "rb"))
-            extradata, version = p.load()
-        except (IOError, EOFError):
-            extradata, version = [{}, {}], None
-        
-        if version != PARSERCACHE_VERSION:
-            continue
-        for h in extradata[0]:
-            if h not in data[0]:
-                data[0][h] = extradata[0][h]
-        for h in extradata[1]:
-            if h not in data[1]:
-                data[1][h] = extradata[1][h]
-        os.unlink(f)
-    # When the dicts are originally created, python calls intern() on the set keys
-    # which significantly improves memory usage. Sadly the pickle/unpickle process 
-    # doesn't call intern() on the keys and results in the same strings being duplicated
-    # in memory. This also means pickle will save the same string multiple times in 
-    # the cache file. By interning the data here, the cache file shrinks dramatically
-    # meaning faster load times and the reloaded cache files also consume much less 
-    # memory. This is worth any performance hit from this loops and the use of the 
-    # intern() data storage.
-    # Python 3.x may behave better in this area
-    for h in data[0]:
-        data[0][h]["refs"] = internSet(data[0][h]["refs"])
-        data[0][h]["execs"] = internSet(data[0][h]["execs"])
-    for h in data[1]:
-        data[1][h]["execs"] = internSet(data[1][h]["execs"])
-    p = pickle.Pickler(file(cachefile, "wb"), -1)
-    p.dump([data, PARSERCACHE_VERSION])
-    bb.utils.unlockfile(glf)
 Logger = logging.getLoggerClass()
 class BufferedLogger(Logger):
@@ -235,14 +158,14 @@ class PythonParser():
    def parse_python(self, node):
        h = hash(str(node))
-        if h in pythonparsecache:
+        if h in codeparsercache.pythoncache:
-            self.references = pythonparsecache[h]["refs"]
+            self.references = codeparsercache.pythoncache[h]["refs"]
-            self.execs = pythonparsecache[h]["execs"]
+            self.execs = codeparsercache.pythoncache[h]["execs"]
            return
-        if h in pythonparsecacheextras:
+        if h in codeparsercache.pythoncacheextras:
-            self.references = pythonparsecacheextras[h]["refs"]
+            self.references = codeparsercache.pythoncacheextras[h]["refs"]
-            self.execs = pythonparsecacheextras[h]["execs"]
+            self.execs = codeparsercache.pythoncacheextras[h]["execs"]
            return
@@ -256,9 +179,9 @@ class PythonParser():
        self.references.update(self.var_references)
        self.references.update(self.var_execs)
-        pythonparsecacheextras[h] = {}
+        codeparsercache.pythoncacheextras[h] = {}
-        pythonparsecacheextras[h]["refs"] = self.references
+        codeparsercache.pythoncacheextras[h]["refs"] = self.references
-        pythonparsecacheextras[h]["execs"] = self.execs
+        codeparsercache.pythoncacheextras[h]["execs"] = self.execs
 class ShellParser():
    def __init__(self, name, log):
@@ -276,12 +199,12 @@ class ShellParser():
        h = hash(str(value))
-        if h in shellparsecache:
+        if h in codeparsercache.shellcache:
-            self.execs = shellparsecache[h]["execs"]
+            self.execs = codeparsercache.shellcache[h]["execs"]
            return self.execs
-        if h in shellparsecacheextras:
+        if h in codeparsercache.shellcacheextras:
-            self.execs = shellparsecacheextras[h]["execs"]
+            self.execs = codeparsercache.shellcacheextras[h]["execs"]
            return self.execs
        try:
@@ -293,8 +216,8 @@ class ShellParser():
            self.process_tokens(token)
        self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
-        shellparsecacheextras[h] = {}
+        codeparsercache.shellcacheextras[h] = {}
-        shellparsecacheextras[h]["execs"] = self.execs
+        codeparsercache.shellcacheextras[h]["execs"] = self.execs
        return self.execs
author	Paul Eggleton <paul.eggleton@linux.intel.com>	2012-05-23 00:23:31 +0100
committer	Richard Purdie <richard.purdie@linuxfoundation.org>	2012-05-23 11:33:18 +0100
commit	d7b818b51f3e6dded0c0885cdfed5a24cda3b428 (patch)
tree	6cd18f70cb71682aad55a6079da32c25f60bcbf7
parent	644b30adfb8fb158a253712033f717aadf6f2c68 (diff)
download	poky-d7b818b51f3e6dded0c0885cdfed5a24cda3b428.tar.gz

diff --git a/bitbake/lib/bb/cache.py b/bitbake/lib/bb/cache.py index 47e814b577..36e6356f51 100644 --- a/bitbake/lib/bb/cache.py +++ b/bitbake/lib/bb/cache.py
@@ -1,11 +1,12 @@
1	# ex:ts=4:sw=4:sts=4:et	1	# ex:ts=4:sw=4:sts=4:et
2	# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --	2	# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --
3	#	3	#
4	# BitBake 'Event' implementation	4	# BitBake Cache implementation
5	#	5	#
6	# Caching of bitbake variables before task execution	6	# Caching of bitbake variables before task execution
7		7
8	# Copyright (C) 2006 Richard Purdie	8	# Copyright (C) 2006 Richard Purdie
		9	# Copyright (C) 2012 Intel Corporation
9		10
10	# but small sections based on code from bin/bitbake:	11	# but small sections based on code from bin/bitbake:
11	# Copyright (C) 2003, 2004 Chris Larson	12	# Copyright (C) 2003, 2004 Chris Larson
@@ -703,4 +704,115 @@ class CacheData(object):
703	for info in info_array:	704	for info in info_array:
704	info.add_cacheData(self, fn)	705	info.add_cacheData(self, fn)
705		706
706		707
		708	class MultiProcessCache(object):
		709	"""
		710	BitBake multi-process cache implementation
		711
		712	Used by the codeparser & file checksum caches
		713	"""
		714
		715	def __init__(self):
		716	self.cachefile = None
		717	self.cachedata = self.create_cachedata()
		718	self.cachedata_extras = self.create_cachedata()
		719
		720	def init_cache(self, d):
		721	cachedir = (d.getVar("PERSISTENT_DIR", True) or
		722	d.getVar("CACHE", True))
		723	if cachedir in [None, '']:
		724	return
		725	bb.utils.mkdirhier(cachedir)
		726	self.cachefile = os.path.join(cachedir, self.__class__.cache_file_name)
		727	logger.debug(1, "Using cache in '%s'", self.cachefile)
		728
		729	try:
		730	p = pickle.Unpickler(file(self.cachefile, "rb"))
		731	data, version = p.load()
		732	except:
		733	return
		734
		735	if version != self.__class__.CACHE_VERSION:
		736	return
		737
		738	self.cachedata = data
		739
		740	def internSet(self, items):
		741	new = set()
		742	for i in items:
		743	new.add(intern(i))
		744	return new
		745
		746	def compress_keys(self, data):
		747	# Override in subclasses if desired
		748	return
		749
		750	def create_cachedata(self):
		751	data = [{}]
		752	return data
		753
		754	def save_extras(self, d):
		755	if not self.cachefile:
		756	return
		757
		758	glf = bb.utils.lockfile(self.cachefile + ".lock", shared=True)
		759
		760	i = os.getpid()
		761	lf = None
		762	while not lf:
		763	lf = bb.utils.lockfile(self.cachefile + ".lock." + str(i), retry=False)
		764	if not lf or os.path.exists(self.cachefile + "-" + str(i)):
		765	if lf:
		766	bb.utils.unlockfile(lf)
		767	lf = None
		768	i = i + 1
		769	continue
		770
		771	p = pickle.Pickler(file(self.cachefile + "-" + str(i), "wb"), -1)
		772	p.dump([self.cachedata_extras, self.__class__.CACHE_VERSION])
		773
		774	bb.utils.unlockfile(lf)
		775	bb.utils.unlockfile(glf)
		776
		777	def merge_data(self, source, dest):
		778	for j in range(0,len(dest)):
		779	for h in source[j]:
		780	if h not in dest[j]:
		781	dest[j][h] = source[j][h]
		782
		783	def save_merge(self, d):
		784	if not self.cachefile:
		785	return
		786
		787	glf = bb.utils.lockfile(self.cachefile + ".lock")
		788
		789	try:
		790	p = pickle.Unpickler(file(self.cachefile, "rb"))
		791	data, version = p.load()
		792	except (IOError, EOFError):
		793	data, version = None, None
		794
		795	if version != self.__class__.CACHE_VERSION:
		796	data = self.create_cachedata()
		797
		798	for f in [y for y in os.listdir(os.path.dirname(self.cachefile)) if y.startswith(os.path.basename(self.cachefile) + '-')]:
		799	f = os.path.join(os.path.dirname(self.cachefile), f)
		800	try:
		801	p = pickle.Unpickler(file(f, "rb"))
		802	extradata, version = p.load()
		803	except (IOError, EOFError):
		804	extradata, version = self.create_cachedata(), None
		805
		806	if version != self.__class__.CACHE_VERSION:
		807	continue
		808
		809	self.merge_data(extradata, data)
		810	os.unlink(f)
		811
		812	self.compress_keys(data)
		813
		814	p = pickle.Pickler(file(self.cachefile, "wb"), -1)
		815	p.dump([data, self.__class__.CACHE_VERSION])
		816
		817	bb.utils.unlockfile(glf)
		818


diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py index af2e19411c..d7d3f513d9 100644 --- a/bitbake/lib/bb/codeparser.py +++ b/bitbake/lib/bb/codeparser.py
@@ -5,10 +5,10 @@ import os.path
5	import bb.utils, bb.data	5	import bb.utils, bb.data
6	from itertools import chain	6	from itertools import chain
7	from pysh import pyshyacc, pyshlex, sherrors	7	from pysh import pyshyacc, pyshlex, sherrors
		8	from bb.cache import MultiProcessCache
8		9
9		10
10	logger = logging.getLogger('BitBake.CodeParser')	11	logger = logging.getLogger('BitBake.CodeParser')
11	PARSERCACHE_VERSION = 2
12		12
13	try:	13	try:
14	import cPickle as pickle	14	import cPickle as pickle
@@ -32,133 +32,56 @@ def check_indent(codestr):
32		32
33	return codestr	33	return codestr
34		34
35	pythonparsecache = {}
36	shellparsecache = {}
37	pythonparsecacheextras = {}
38	shellparsecacheextras = {}
39		35
40		36	class CodeParserCache(MultiProcessCache):
41	def parser_cachefile(d):	37	cache_file_name = "bb_codeparser.dat"
42	cachedir = (d.getVar("PERSISTENT_DIR", True) or	38	CACHE_VERSION = 2
43	d.getVar("CACHE", True))	39
44	if cachedir in [None, '']:	40	def __init__(self):
45	return None	41	MultiProcessCache.__init__(self)
46	bb.utils.mkdirhier(cachedir)	42	self.pythoncache = self.cachedata[0]
47	cachefile = os.path.join(cachedir, "bb_codeparser.dat")	43	self.shellcache = self.cachedata[1]
48	logger.debug(1, "Using cache in '%s' for codeparser cache", cachefile)	44	self.pythoncacheextras = self.cachedata_extras[0]
49	return cachefile	45	self.shellcacheextras = self.cachedata_extras[1]
50		46
51	def parser_cache_init(d):	47	def init_cache(self, d):
52	global pythonparsecache	48	MultiProcessCache.init_cache(self, d)
53	global shellparsecache	49
54		50	# cachedata gets re-assigned in the parent
55	cachefile = parser_cachefile(d)	51	self.pythoncache = self.cachedata[0]
56	if not cachefile:	52	self.shellcache = self.cachedata[1]
		53
		54	def compress_keys(self, data):
		55	# When the dicts are originally created, python calls intern() on the set keys
		56	# which significantly improves memory usage. Sadly the pickle/unpickle process
		57	# doesn't call intern() on the keys and results in the same strings being duplicated
		58	# in memory. This also means pickle will save the same string multiple times in
		59	# the cache file. By interning the data here, the cache file shrinks dramatically
		60	# meaning faster load times and the reloaded cache files also consume much less
		61	# memory. This is worth any performance hit from this loops and the use of the
		62	# intern() data storage.
		63	# Python 3.x may behave better in this area
		64	for h in data[0]:
		65	data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
		66	data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
		67	for h in data[1]:
		68	data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
57	return	69	return
58		70
59	try:	71	def create_cachedata(self):
60	p = pickle.Unpickler(file(cachefile, "rb"))	72	data = [{}, {}]
61	data, version = p.load()	73	return data
62	except:
63	return
64		74
65	if version != PARSERCACHE_VERSION:	75	codeparsercache = CodeParserCache()
66	return
67		76
68	pythonparsecache = data[0]	77	def parser_cache_init(d):
69	shellparsecache = data[1]	78	codeparsercache.init_cache(d)
70		79
71	def parser_cache_save(d):	80	def parser_cache_save(d):
72	cachefile = parser_cachefile(d)	81	codeparsercache.save_extras(d)
73	if not cachefile:
74	return
75
76	glf = bb.utils.lockfile(cachefile + ".lock", shared=True)
77
78	i = os.getpid()
79	lf = None
80	while not lf:
81	shellcache = {}
82	pythoncache = {}
83
84	lf = bb.utils.lockfile(cachefile + ".lock." + str(i), retry=False)
85	if not lf or os.path.exists(cachefile + "-" + str(i)):
86	if lf:
87	bb.utils.unlockfile(lf)
88	lf = None
89	i = i + 1
90	continue
91
92	shellcache = shellparsecacheextras
93	pythoncache = pythonparsecacheextras
94
95	p = pickle.Pickler(file(cachefile + "-" + str(i), "wb"), -1)
96	p.dump([[pythoncache, shellcache], PARSERCACHE_VERSION])
97
98	bb.utils.unlockfile(lf)
99	bb.utils.unlockfile(glf)
100
101	def internSet(items):
102	new = set()
103	for i in items:
104	new.add(intern(i))
105	return new
106		82
107	def parser_cache_savemerge(d):	83	def parser_cache_savemerge(d):
108	cachefile = parser_cachefile(d)	84	codeparsercache.save_merge(d)
109	if not cachefile:
110	return
111
112	glf = bb.utils.lockfile(cachefile + ".lock")
113
114	try:
115	p = pickle.Unpickler(file(cachefile, "rb"))
116	data, version = p.load()
117	except (IOError, EOFError):
118	data, version = None, None
119
120	if version != PARSERCACHE_VERSION:
121	data = [{}, {}]
122
123	for f in [y for y in os.listdir(os.path.dirname(cachefile)) if y.startswith(os.path.basename(cachefile) + '-')]:
124	f = os.path.join(os.path.dirname(cachefile), f)
125	try:
126	p = pickle.Unpickler(file(f, "rb"))
127	extradata, version = p.load()
128	except (IOError, EOFError):
129	extradata, version = [{}, {}], None
130
131	if version != PARSERCACHE_VERSION:
132	continue
133
134	for h in extradata[0]:
135	if h not in data[0]:
136	data[0][h] = extradata[0][h]
137	for h in extradata[1]:
138	if h not in data[1]:
139	data[1][h] = extradata[1][h]
140	os.unlink(f)
141
142	# When the dicts are originally created, python calls intern() on the set keys
143	# which significantly improves memory usage. Sadly the pickle/unpickle process
144	# doesn't call intern() on the keys and results in the same strings being duplicated
145	# in memory. This also means pickle will save the same string multiple times in
146	# the cache file. By interning the data here, the cache file shrinks dramatically
147	# meaning faster load times and the reloaded cache files also consume much less
148	# memory. This is worth any performance hit from this loops and the use of the
149	# intern() data storage.
150	# Python 3.x may behave better in this area
151	for h in data[0]:
152	data[0][h]["refs"] = internSet(data[0][h]["refs"])
153	data[0][h]["execs"] = internSet(data[0][h]["execs"])
154	for h in data[1]:
155	data[1][h]["execs"] = internSet(data[1][h]["execs"])
156
157	p = pickle.Pickler(file(cachefile, "wb"), -1)
158	p.dump([data, PARSERCACHE_VERSION])
159
160	bb.utils.unlockfile(glf)
161
162		85
163	Logger = logging.getLoggerClass()	86	Logger = logging.getLoggerClass()
164	class BufferedLogger(Logger):	87	class BufferedLogger(Logger):
@@ -235,14 +158,14 @@ class PythonParser():
235	def parse_python(self, node):	158	def parse_python(self, node):
236	h = hash(str(node))	159	h = hash(str(node))
237		160
238	if h in pythonparsecache:	161	if h in codeparsercache.pythoncache:
239	self.references = pythonparsecache[h]["refs"]	162	self.references = codeparsercache.pythoncache[h]["refs"]
240	self.execs = pythonparsecache[h]["execs"]	163	self.execs = codeparsercache.pythoncache[h]["execs"]
241	return	164	return
242		165
243	if h in pythonparsecacheextras:	166	if h in codeparsercache.pythoncacheextras:
244	self.references = pythonparsecacheextras[h]["refs"]	167	self.references = codeparsercache.pythoncacheextras[h]["refs"]
245	self.execs = pythonparsecacheextras[h]["execs"]	168	self.execs = codeparsercache.pythoncacheextras[h]["execs"]
246	return	169	return
247		170
248		171
@@ -256,9 +179,9 @@ class PythonParser():
256	self.references.update(self.var_references)	179	self.references.update(self.var_references)
257	self.references.update(self.var_execs)	180	self.references.update(self.var_execs)
258		181
259	pythonparsecacheextras[h] = {}	182	codeparsercache.pythoncacheextras[h] = {}
260	pythonparsecacheextras[h]["refs"] = self.references	183	codeparsercache.pythoncacheextras[h]["refs"] = self.references
261	pythonparsecacheextras[h]["execs"] = self.execs	184	codeparsercache.pythoncacheextras[h]["execs"] = self.execs
262		185
263	class ShellParser():	186	class ShellParser():
264	def __init__(self, name, log):	187	def __init__(self, name, log):
@@ -276,12 +199,12 @@ class ShellParser():
276		199
277	h = hash(str(value))	200	h = hash(str(value))
278		201
279	if h in shellparsecache:	202	if h in codeparsercache.shellcache:
280	self.execs = shellparsecache[h]["execs"]	203	self.execs = codeparsercache.shellcache[h]["execs"]
281	return self.execs	204	return self.execs
282		205
283	if h in shellparsecacheextras:	206	if h in codeparsercache.shellcacheextras:
284	self.execs = shellparsecacheextras[h]["execs"]	207	self.execs = codeparsercache.shellcacheextras[h]["execs"]
285	return self.execs	208	return self.execs
286		209
287	try:	210	try:
@@ -293,8 +216,8 @@ class ShellParser():
293	self.process_tokens(token)	216	self.process_tokens(token)
294	self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)	217	self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
295		218
296	shellparsecacheextras[h] = {}	219	codeparsercache.shellcacheextras[h] = {}
297	shellparsecacheextras[h]["execs"] = self.execs	220	codeparsercache.shellcacheextras[h]["execs"] = self.execs
298		221
299	return self.execs	222	return self.execs
300		223