summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Eggleton <paul.eggleton@linux.intel.com>2012-05-23 00:23:31 +0100
committerRichard Purdie <richard.purdie@linuxfoundation.org>2012-05-23 11:33:18 +0100
commitd7b818b51f3e6dded0c0885cdfed5a24cda3b428 (patch)
tree6cd18f70cb71682aad55a6079da32c25f60bcbf7
parent644b30adfb8fb158a253712033f717aadf6f2c68 (diff)
downloadpoky-d7b818b51f3e6dded0c0885cdfed5a24cda3b428.tar.gz
bitbake: refactor out codeparser cache into a separate class
We want to be able to reuse most this functionality for the file checksum cache. (Bitbake rev: 0fe3cb1438d297f90dd0fc6b26362ecbff75c76d) Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--bitbake/lib/bb/cache.py116
-rw-r--r--bitbake/lib/bb/codeparser.py191
2 files changed, 171 insertions, 136 deletions
diff --git a/bitbake/lib/bb/cache.py b/bitbake/lib/bb/cache.py
index 47e814b577..36e6356f51 100644
--- a/bitbake/lib/bb/cache.py
+++ b/bitbake/lib/bb/cache.py
@@ -1,11 +1,12 @@
1# ex:ts=4:sw=4:sts=4:et 1# ex:ts=4:sw=4:sts=4:et
2# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- 2# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
3# 3#
4# BitBake 'Event' implementation 4# BitBake Cache implementation
5# 5#
6# Caching of bitbake variables before task execution 6# Caching of bitbake variables before task execution
7 7
8# Copyright (C) 2006 Richard Purdie 8# Copyright (C) 2006 Richard Purdie
9# Copyright (C) 2012 Intel Corporation
9 10
10# but small sections based on code from bin/bitbake: 11# but small sections based on code from bin/bitbake:
11# Copyright (C) 2003, 2004 Chris Larson 12# Copyright (C) 2003, 2004 Chris Larson
@@ -703,4 +704,115 @@ class CacheData(object):
703 for info in info_array: 704 for info in info_array:
704 info.add_cacheData(self, fn) 705 info.add_cacheData(self, fn)
705 706
706 707
708class MultiProcessCache(object):
709 """
710 BitBake multi-process cache implementation
711
712 Used by the codeparser & file checksum caches
713 """
714
715 def __init__(self):
716 self.cachefile = None
717 self.cachedata = self.create_cachedata()
718 self.cachedata_extras = self.create_cachedata()
719
720 def init_cache(self, d):
721 cachedir = (d.getVar("PERSISTENT_DIR", True) or
722 d.getVar("CACHE", True))
723 if cachedir in [None, '']:
724 return
725 bb.utils.mkdirhier(cachedir)
726 self.cachefile = os.path.join(cachedir, self.__class__.cache_file_name)
727 logger.debug(1, "Using cache in '%s'", self.cachefile)
728
729 try:
730 p = pickle.Unpickler(file(self.cachefile, "rb"))
731 data, version = p.load()
732 except:
733 return
734
735 if version != self.__class__.CACHE_VERSION:
736 return
737
738 self.cachedata = data
739
740 def internSet(self, items):
741 new = set()
742 for i in items:
743 new.add(intern(i))
744 return new
745
746 def compress_keys(self, data):
747 # Override in subclasses if desired
748 return
749
750 def create_cachedata(self):
751 data = [{}]
752 return data
753
754 def save_extras(self, d):
755 if not self.cachefile:
756 return
757
758 glf = bb.utils.lockfile(self.cachefile + ".lock", shared=True)
759
760 i = os.getpid()
761 lf = None
762 while not lf:
763 lf = bb.utils.lockfile(self.cachefile + ".lock." + str(i), retry=False)
764 if not lf or os.path.exists(self.cachefile + "-" + str(i)):
765 if lf:
766 bb.utils.unlockfile(lf)
767 lf = None
768 i = i + 1
769 continue
770
771 p = pickle.Pickler(file(self.cachefile + "-" + str(i), "wb"), -1)
772 p.dump([self.cachedata_extras, self.__class__.CACHE_VERSION])
773
774 bb.utils.unlockfile(lf)
775 bb.utils.unlockfile(glf)
776
777 def merge_data(self, source, dest):
778 for j in range(0,len(dest)):
779 for h in source[j]:
780 if h not in dest[j]:
781 dest[j][h] = source[j][h]
782
783 def save_merge(self, d):
784 if not self.cachefile:
785 return
786
787 glf = bb.utils.lockfile(self.cachefile + ".lock")
788
789 try:
790 p = pickle.Unpickler(file(self.cachefile, "rb"))
791 data, version = p.load()
792 except (IOError, EOFError):
793 data, version = None, None
794
795 if version != self.__class__.CACHE_VERSION:
796 data = self.create_cachedata()
797
798 for f in [y for y in os.listdir(os.path.dirname(self.cachefile)) if y.startswith(os.path.basename(self.cachefile) + '-')]:
799 f = os.path.join(os.path.dirname(self.cachefile), f)
800 try:
801 p = pickle.Unpickler(file(f, "rb"))
802 extradata, version = p.load()
803 except (IOError, EOFError):
804 extradata, version = self.create_cachedata(), None
805
806 if version != self.__class__.CACHE_VERSION:
807 continue
808
809 self.merge_data(extradata, data)
810 os.unlink(f)
811
812 self.compress_keys(data)
813
814 p = pickle.Pickler(file(self.cachefile, "wb"), -1)
815 p.dump([data, self.__class__.CACHE_VERSION])
816
817 bb.utils.unlockfile(glf)
818
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
index af2e19411c..d7d3f513d9 100644
--- a/bitbake/lib/bb/codeparser.py
+++ b/bitbake/lib/bb/codeparser.py
@@ -5,10 +5,10 @@ import os.path
5import bb.utils, bb.data 5import bb.utils, bb.data
6from itertools import chain 6from itertools import chain
7from pysh import pyshyacc, pyshlex, sherrors 7from pysh import pyshyacc, pyshlex, sherrors
8from bb.cache import MultiProcessCache
8 9
9 10
10logger = logging.getLogger('BitBake.CodeParser') 11logger = logging.getLogger('BitBake.CodeParser')
11PARSERCACHE_VERSION = 2
12 12
13try: 13try:
14 import cPickle as pickle 14 import cPickle as pickle
@@ -32,133 +32,56 @@ def check_indent(codestr):
32 32
33 return codestr 33 return codestr
34 34
35pythonparsecache = {}
36shellparsecache = {}
37pythonparsecacheextras = {}
38shellparsecacheextras = {}
39 35
40 36class CodeParserCache(MultiProcessCache):
41def parser_cachefile(d): 37 cache_file_name = "bb_codeparser.dat"
42 cachedir = (d.getVar("PERSISTENT_DIR", True) or 38 CACHE_VERSION = 2
43 d.getVar("CACHE", True)) 39
44 if cachedir in [None, '']: 40 def __init__(self):
45 return None 41 MultiProcessCache.__init__(self)
46 bb.utils.mkdirhier(cachedir) 42 self.pythoncache = self.cachedata[0]
47 cachefile = os.path.join(cachedir, "bb_codeparser.dat") 43 self.shellcache = self.cachedata[1]
48 logger.debug(1, "Using cache in '%s' for codeparser cache", cachefile) 44 self.pythoncacheextras = self.cachedata_extras[0]
49 return cachefile 45 self.shellcacheextras = self.cachedata_extras[1]
50 46
51def parser_cache_init(d): 47 def init_cache(self, d):
52 global pythonparsecache 48 MultiProcessCache.init_cache(self, d)
53 global shellparsecache 49
54 50 # cachedata gets re-assigned in the parent
55 cachefile = parser_cachefile(d) 51 self.pythoncache = self.cachedata[0]
56 if not cachefile: 52 self.shellcache = self.cachedata[1]
53
54 def compress_keys(self, data):
55 # When the dicts are originally created, python calls intern() on the set keys
56 # which significantly improves memory usage. Sadly the pickle/unpickle process
57 # doesn't call intern() on the keys and results in the same strings being duplicated
58 # in memory. This also means pickle will save the same string multiple times in
59 # the cache file. By interning the data here, the cache file shrinks dramatically
60 # meaning faster load times and the reloaded cache files also consume much less
61 # memory. This is worth any performance hit from this loops and the use of the
62 # intern() data storage.
63 # Python 3.x may behave better in this area
64 for h in data[0]:
65 data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
66 data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
67 for h in data[1]:
68 data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
57 return 69 return
58 70
59 try: 71 def create_cachedata(self):
60 p = pickle.Unpickler(file(cachefile, "rb")) 72 data = [{}, {}]
61 data, version = p.load() 73 return data
62 except:
63 return
64 74
65 if version != PARSERCACHE_VERSION: 75codeparsercache = CodeParserCache()
66 return
67 76
68 pythonparsecache = data[0] 77def parser_cache_init(d):
69 shellparsecache = data[1] 78 codeparsercache.init_cache(d)
70 79
71def parser_cache_save(d): 80def parser_cache_save(d):
72 cachefile = parser_cachefile(d) 81 codeparsercache.save_extras(d)
73 if not cachefile:
74 return
75
76 glf = bb.utils.lockfile(cachefile + ".lock", shared=True)
77
78 i = os.getpid()
79 lf = None
80 while not lf:
81 shellcache = {}
82 pythoncache = {}
83
84 lf = bb.utils.lockfile(cachefile + ".lock." + str(i), retry=False)
85 if not lf or os.path.exists(cachefile + "-" + str(i)):
86 if lf:
87 bb.utils.unlockfile(lf)
88 lf = None
89 i = i + 1
90 continue
91
92 shellcache = shellparsecacheextras
93 pythoncache = pythonparsecacheextras
94
95 p = pickle.Pickler(file(cachefile + "-" + str(i), "wb"), -1)
96 p.dump([[pythoncache, shellcache], PARSERCACHE_VERSION])
97
98 bb.utils.unlockfile(lf)
99 bb.utils.unlockfile(glf)
100
101def internSet(items):
102 new = set()
103 for i in items:
104 new.add(intern(i))
105 return new
106 82
107def parser_cache_savemerge(d): 83def parser_cache_savemerge(d):
108 cachefile = parser_cachefile(d) 84 codeparsercache.save_merge(d)
109 if not cachefile:
110 return
111
112 glf = bb.utils.lockfile(cachefile + ".lock")
113
114 try:
115 p = pickle.Unpickler(file(cachefile, "rb"))
116 data, version = p.load()
117 except (IOError, EOFError):
118 data, version = None, None
119
120 if version != PARSERCACHE_VERSION:
121 data = [{}, {}]
122
123 for f in [y for y in os.listdir(os.path.dirname(cachefile)) if y.startswith(os.path.basename(cachefile) + '-')]:
124 f = os.path.join(os.path.dirname(cachefile), f)
125 try:
126 p = pickle.Unpickler(file(f, "rb"))
127 extradata, version = p.load()
128 except (IOError, EOFError):
129 extradata, version = [{}, {}], None
130
131 if version != PARSERCACHE_VERSION:
132 continue
133
134 for h in extradata[0]:
135 if h not in data[0]:
136 data[0][h] = extradata[0][h]
137 for h in extradata[1]:
138 if h not in data[1]:
139 data[1][h] = extradata[1][h]
140 os.unlink(f)
141
142 # When the dicts are originally created, python calls intern() on the set keys
143 # which significantly improves memory usage. Sadly the pickle/unpickle process
144 # doesn't call intern() on the keys and results in the same strings being duplicated
145 # in memory. This also means pickle will save the same string multiple times in
146 # the cache file. By interning the data here, the cache file shrinks dramatically
147 # meaning faster load times and the reloaded cache files also consume much less
148 # memory. This is worth any performance hit from this loops and the use of the
149 # intern() data storage.
150 # Python 3.x may behave better in this area
151 for h in data[0]:
152 data[0][h]["refs"] = internSet(data[0][h]["refs"])
153 data[0][h]["execs"] = internSet(data[0][h]["execs"])
154 for h in data[1]:
155 data[1][h]["execs"] = internSet(data[1][h]["execs"])
156
157 p = pickle.Pickler(file(cachefile, "wb"), -1)
158 p.dump([data, PARSERCACHE_VERSION])
159
160 bb.utils.unlockfile(glf)
161
162 85
163Logger = logging.getLoggerClass() 86Logger = logging.getLoggerClass()
164class BufferedLogger(Logger): 87class BufferedLogger(Logger):
@@ -235,14 +158,14 @@ class PythonParser():
235 def parse_python(self, node): 158 def parse_python(self, node):
236 h = hash(str(node)) 159 h = hash(str(node))
237 160
238 if h in pythonparsecache: 161 if h in codeparsercache.pythoncache:
239 self.references = pythonparsecache[h]["refs"] 162 self.references = codeparsercache.pythoncache[h]["refs"]
240 self.execs = pythonparsecache[h]["execs"] 163 self.execs = codeparsercache.pythoncache[h]["execs"]
241 return 164 return
242 165
243 if h in pythonparsecacheextras: 166 if h in codeparsercache.pythoncacheextras:
244 self.references = pythonparsecacheextras[h]["refs"] 167 self.references = codeparsercache.pythoncacheextras[h]["refs"]
245 self.execs = pythonparsecacheextras[h]["execs"] 168 self.execs = codeparsercache.pythoncacheextras[h]["execs"]
246 return 169 return
247 170
248 171
@@ -256,9 +179,9 @@ class PythonParser():
256 self.references.update(self.var_references) 179 self.references.update(self.var_references)
257 self.references.update(self.var_execs) 180 self.references.update(self.var_execs)
258 181
259 pythonparsecacheextras[h] = {} 182 codeparsercache.pythoncacheextras[h] = {}
260 pythonparsecacheextras[h]["refs"] = self.references 183 codeparsercache.pythoncacheextras[h]["refs"] = self.references
261 pythonparsecacheextras[h]["execs"] = self.execs 184 codeparsercache.pythoncacheextras[h]["execs"] = self.execs
262 185
263class ShellParser(): 186class ShellParser():
264 def __init__(self, name, log): 187 def __init__(self, name, log):
@@ -276,12 +199,12 @@ class ShellParser():
276 199
277 h = hash(str(value)) 200 h = hash(str(value))
278 201
279 if h in shellparsecache: 202 if h in codeparsercache.shellcache:
280 self.execs = shellparsecache[h]["execs"] 203 self.execs = codeparsercache.shellcache[h]["execs"]
281 return self.execs 204 return self.execs
282 205
283 if h in shellparsecacheextras: 206 if h in codeparsercache.shellcacheextras:
284 self.execs = shellparsecacheextras[h]["execs"] 207 self.execs = codeparsercache.shellcacheextras[h]["execs"]
285 return self.execs 208 return self.execs
286 209
287 try: 210 try:
@@ -293,8 +216,8 @@ class ShellParser():
293 self.process_tokens(token) 216 self.process_tokens(token)
294 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) 217 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
295 218
296 shellparsecacheextras[h] = {} 219 codeparsercache.shellcacheextras[h] = {}
297 shellparsecacheextras[h]["execs"] = self.execs 220 codeparsercache.shellcacheextras[h]["execs"] = self.execs
298 221
299 return self.execs 222 return self.execs
300 223