diff options
author | Paul Eggleton <paul.eggleton@linux.intel.com> | 2012-05-23 00:23:31 +0100 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2012-05-23 11:33:18 +0100 |
commit | d7b818b51f3e6dded0c0885cdfed5a24cda3b428 (patch) | |
tree | 6cd18f70cb71682aad55a6079da32c25f60bcbf7 | |
parent | 644b30adfb8fb158a253712033f717aadf6f2c68 (diff) | |
download | poky-d7b818b51f3e6dded0c0885cdfed5a24cda3b428.tar.gz |
bitbake: refactor out codeparser cache into a separate class
We want to be able to reuse most this functionality for the file
checksum cache.
(Bitbake rev: 0fe3cb1438d297f90dd0fc6b26362ecbff75c76d)
Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r-- | bitbake/lib/bb/cache.py | 116 | ||||
-rw-r--r-- | bitbake/lib/bb/codeparser.py | 191 |
2 files changed, 171 insertions, 136 deletions
diff --git a/bitbake/lib/bb/cache.py b/bitbake/lib/bb/cache.py index 47e814b577..36e6356f51 100644 --- a/bitbake/lib/bb/cache.py +++ b/bitbake/lib/bb/cache.py | |||
@@ -1,11 +1,12 @@ | |||
1 | # ex:ts=4:sw=4:sts=4:et | 1 | # ex:ts=4:sw=4:sts=4:et |
2 | # -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- | 2 | # -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- |
3 | # | 3 | # |
4 | # BitBake 'Event' implementation | 4 | # BitBake Cache implementation |
5 | # | 5 | # |
6 | # Caching of bitbake variables before task execution | 6 | # Caching of bitbake variables before task execution |
7 | 7 | ||
8 | # Copyright (C) 2006 Richard Purdie | 8 | # Copyright (C) 2006 Richard Purdie |
9 | # Copyright (C) 2012 Intel Corporation | ||
9 | 10 | ||
10 | # but small sections based on code from bin/bitbake: | 11 | # but small sections based on code from bin/bitbake: |
11 | # Copyright (C) 2003, 2004 Chris Larson | 12 | # Copyright (C) 2003, 2004 Chris Larson |
@@ -703,4 +704,115 @@ class CacheData(object): | |||
703 | for info in info_array: | 704 | for info in info_array: |
704 | info.add_cacheData(self, fn) | 705 | info.add_cacheData(self, fn) |
705 | 706 | ||
706 | 707 | ||
708 | class MultiProcessCache(object): | ||
709 | """ | ||
710 | BitBake multi-process cache implementation | ||
711 | |||
712 | Used by the codeparser & file checksum caches | ||
713 | """ | ||
714 | |||
715 | def __init__(self): | ||
716 | self.cachefile = None | ||
717 | self.cachedata = self.create_cachedata() | ||
718 | self.cachedata_extras = self.create_cachedata() | ||
719 | |||
720 | def init_cache(self, d): | ||
721 | cachedir = (d.getVar("PERSISTENT_DIR", True) or | ||
722 | d.getVar("CACHE", True)) | ||
723 | if cachedir in [None, '']: | ||
724 | return | ||
725 | bb.utils.mkdirhier(cachedir) | ||
726 | self.cachefile = os.path.join(cachedir, self.__class__.cache_file_name) | ||
727 | logger.debug(1, "Using cache in '%s'", self.cachefile) | ||
728 | |||
729 | try: | ||
730 | p = pickle.Unpickler(file(self.cachefile, "rb")) | ||
731 | data, version = p.load() | ||
732 | except: | ||
733 | return | ||
734 | |||
735 | if version != self.__class__.CACHE_VERSION: | ||
736 | return | ||
737 | |||
738 | self.cachedata = data | ||
739 | |||
740 | def internSet(self, items): | ||
741 | new = set() | ||
742 | for i in items: | ||
743 | new.add(intern(i)) | ||
744 | return new | ||
745 | |||
746 | def compress_keys(self, data): | ||
747 | # Override in subclasses if desired | ||
748 | return | ||
749 | |||
750 | def create_cachedata(self): | ||
751 | data = [{}] | ||
752 | return data | ||
753 | |||
754 | def save_extras(self, d): | ||
755 | if not self.cachefile: | ||
756 | return | ||
757 | |||
758 | glf = bb.utils.lockfile(self.cachefile + ".lock", shared=True) | ||
759 | |||
760 | i = os.getpid() | ||
761 | lf = None | ||
762 | while not lf: | ||
763 | lf = bb.utils.lockfile(self.cachefile + ".lock." + str(i), retry=False) | ||
764 | if not lf or os.path.exists(self.cachefile + "-" + str(i)): | ||
765 | if lf: | ||
766 | bb.utils.unlockfile(lf) | ||
767 | lf = None | ||
768 | i = i + 1 | ||
769 | continue | ||
770 | |||
771 | p = pickle.Pickler(file(self.cachefile + "-" + str(i), "wb"), -1) | ||
772 | p.dump([self.cachedata_extras, self.__class__.CACHE_VERSION]) | ||
773 | |||
774 | bb.utils.unlockfile(lf) | ||
775 | bb.utils.unlockfile(glf) | ||
776 | |||
777 | def merge_data(self, source, dest): | ||
778 | for j in range(0,len(dest)): | ||
779 | for h in source[j]: | ||
780 | if h not in dest[j]: | ||
781 | dest[j][h] = source[j][h] | ||
782 | |||
783 | def save_merge(self, d): | ||
784 | if not self.cachefile: | ||
785 | return | ||
786 | |||
787 | glf = bb.utils.lockfile(self.cachefile + ".lock") | ||
788 | |||
789 | try: | ||
790 | p = pickle.Unpickler(file(self.cachefile, "rb")) | ||
791 | data, version = p.load() | ||
792 | except (IOError, EOFError): | ||
793 | data, version = None, None | ||
794 | |||
795 | if version != self.__class__.CACHE_VERSION: | ||
796 | data = self.create_cachedata() | ||
797 | |||
798 | for f in [y for y in os.listdir(os.path.dirname(self.cachefile)) if y.startswith(os.path.basename(self.cachefile) + '-')]: | ||
799 | f = os.path.join(os.path.dirname(self.cachefile), f) | ||
800 | try: | ||
801 | p = pickle.Unpickler(file(f, "rb")) | ||
802 | extradata, version = p.load() | ||
803 | except (IOError, EOFError): | ||
804 | extradata, version = self.create_cachedata(), None | ||
805 | |||
806 | if version != self.__class__.CACHE_VERSION: | ||
807 | continue | ||
808 | |||
809 | self.merge_data(extradata, data) | ||
810 | os.unlink(f) | ||
811 | |||
812 | self.compress_keys(data) | ||
813 | |||
814 | p = pickle.Pickler(file(self.cachefile, "wb"), -1) | ||
815 | p.dump([data, self.__class__.CACHE_VERSION]) | ||
816 | |||
817 | bb.utils.unlockfile(glf) | ||
818 | |||
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py index af2e19411c..d7d3f513d9 100644 --- a/bitbake/lib/bb/codeparser.py +++ b/bitbake/lib/bb/codeparser.py | |||
@@ -5,10 +5,10 @@ import os.path | |||
5 | import bb.utils, bb.data | 5 | import bb.utils, bb.data |
6 | from itertools import chain | 6 | from itertools import chain |
7 | from pysh import pyshyacc, pyshlex, sherrors | 7 | from pysh import pyshyacc, pyshlex, sherrors |
8 | from bb.cache import MultiProcessCache | ||
8 | 9 | ||
9 | 10 | ||
10 | logger = logging.getLogger('BitBake.CodeParser') | 11 | logger = logging.getLogger('BitBake.CodeParser') |
11 | PARSERCACHE_VERSION = 2 | ||
12 | 12 | ||
13 | try: | 13 | try: |
14 | import cPickle as pickle | 14 | import cPickle as pickle |
@@ -32,133 +32,56 @@ def check_indent(codestr): | |||
32 | 32 | ||
33 | return codestr | 33 | return codestr |
34 | 34 | ||
35 | pythonparsecache = {} | ||
36 | shellparsecache = {} | ||
37 | pythonparsecacheextras = {} | ||
38 | shellparsecacheextras = {} | ||
39 | 35 | ||
40 | 36 | class CodeParserCache(MultiProcessCache): | |
41 | def parser_cachefile(d): | 37 | cache_file_name = "bb_codeparser.dat" |
42 | cachedir = (d.getVar("PERSISTENT_DIR", True) or | 38 | CACHE_VERSION = 2 |
43 | d.getVar("CACHE", True)) | 39 | |
44 | if cachedir in [None, '']: | 40 | def __init__(self): |
45 | return None | 41 | MultiProcessCache.__init__(self) |
46 | bb.utils.mkdirhier(cachedir) | 42 | self.pythoncache = self.cachedata[0] |
47 | cachefile = os.path.join(cachedir, "bb_codeparser.dat") | 43 | self.shellcache = self.cachedata[1] |
48 | logger.debug(1, "Using cache in '%s' for codeparser cache", cachefile) | 44 | self.pythoncacheextras = self.cachedata_extras[0] |
49 | return cachefile | 45 | self.shellcacheextras = self.cachedata_extras[1] |
50 | 46 | ||
51 | def parser_cache_init(d): | 47 | def init_cache(self, d): |
52 | global pythonparsecache | 48 | MultiProcessCache.init_cache(self, d) |
53 | global shellparsecache | 49 | |
54 | 50 | # cachedata gets re-assigned in the parent | |
55 | cachefile = parser_cachefile(d) | 51 | self.pythoncache = self.cachedata[0] |
56 | if not cachefile: | 52 | self.shellcache = self.cachedata[1] |
53 | |||
54 | def compress_keys(self, data): | ||
55 | # When the dicts are originally created, python calls intern() on the set keys | ||
56 | # which significantly improves memory usage. Sadly the pickle/unpickle process | ||
57 | # doesn't call intern() on the keys and results in the same strings being duplicated | ||
58 | # in memory. This also means pickle will save the same string multiple times in | ||
59 | # the cache file. By interning the data here, the cache file shrinks dramatically | ||
60 | # meaning faster load times and the reloaded cache files also consume much less | ||
61 | # memory. This is worth any performance hit from this loops and the use of the | ||
62 | # intern() data storage. | ||
63 | # Python 3.x may behave better in this area | ||
64 | for h in data[0]: | ||
65 | data[0][h]["refs"] = self.internSet(data[0][h]["refs"]) | ||
66 | data[0][h]["execs"] = self.internSet(data[0][h]["execs"]) | ||
67 | for h in data[1]: | ||
68 | data[1][h]["execs"] = self.internSet(data[1][h]["execs"]) | ||
57 | return | 69 | return |
58 | 70 | ||
59 | try: | 71 | def create_cachedata(self): |
60 | p = pickle.Unpickler(file(cachefile, "rb")) | 72 | data = [{}, {}] |
61 | data, version = p.load() | 73 | return data |
62 | except: | ||
63 | return | ||
64 | 74 | ||
65 | if version != PARSERCACHE_VERSION: | 75 | codeparsercache = CodeParserCache() |
66 | return | ||
67 | 76 | ||
68 | pythonparsecache = data[0] | 77 | def parser_cache_init(d): |
69 | shellparsecache = data[1] | 78 | codeparsercache.init_cache(d) |
70 | 79 | ||
71 | def parser_cache_save(d): | 80 | def parser_cache_save(d): |
72 | cachefile = parser_cachefile(d) | 81 | codeparsercache.save_extras(d) |
73 | if not cachefile: | ||
74 | return | ||
75 | |||
76 | glf = bb.utils.lockfile(cachefile + ".lock", shared=True) | ||
77 | |||
78 | i = os.getpid() | ||
79 | lf = None | ||
80 | while not lf: | ||
81 | shellcache = {} | ||
82 | pythoncache = {} | ||
83 | |||
84 | lf = bb.utils.lockfile(cachefile + ".lock." + str(i), retry=False) | ||
85 | if not lf or os.path.exists(cachefile + "-" + str(i)): | ||
86 | if lf: | ||
87 | bb.utils.unlockfile(lf) | ||
88 | lf = None | ||
89 | i = i + 1 | ||
90 | continue | ||
91 | |||
92 | shellcache = shellparsecacheextras | ||
93 | pythoncache = pythonparsecacheextras | ||
94 | |||
95 | p = pickle.Pickler(file(cachefile + "-" + str(i), "wb"), -1) | ||
96 | p.dump([[pythoncache, shellcache], PARSERCACHE_VERSION]) | ||
97 | |||
98 | bb.utils.unlockfile(lf) | ||
99 | bb.utils.unlockfile(glf) | ||
100 | |||
101 | def internSet(items): | ||
102 | new = set() | ||
103 | for i in items: | ||
104 | new.add(intern(i)) | ||
105 | return new | ||
106 | 82 | ||
107 | def parser_cache_savemerge(d): | 83 | def parser_cache_savemerge(d): |
108 | cachefile = parser_cachefile(d) | 84 | codeparsercache.save_merge(d) |
109 | if not cachefile: | ||
110 | return | ||
111 | |||
112 | glf = bb.utils.lockfile(cachefile + ".lock") | ||
113 | |||
114 | try: | ||
115 | p = pickle.Unpickler(file(cachefile, "rb")) | ||
116 | data, version = p.load() | ||
117 | except (IOError, EOFError): | ||
118 | data, version = None, None | ||
119 | |||
120 | if version != PARSERCACHE_VERSION: | ||
121 | data = [{}, {}] | ||
122 | |||
123 | for f in [y for y in os.listdir(os.path.dirname(cachefile)) if y.startswith(os.path.basename(cachefile) + '-')]: | ||
124 | f = os.path.join(os.path.dirname(cachefile), f) | ||
125 | try: | ||
126 | p = pickle.Unpickler(file(f, "rb")) | ||
127 | extradata, version = p.load() | ||
128 | except (IOError, EOFError): | ||
129 | extradata, version = [{}, {}], None | ||
130 | |||
131 | if version != PARSERCACHE_VERSION: | ||
132 | continue | ||
133 | |||
134 | for h in extradata[0]: | ||
135 | if h not in data[0]: | ||
136 | data[0][h] = extradata[0][h] | ||
137 | for h in extradata[1]: | ||
138 | if h not in data[1]: | ||
139 | data[1][h] = extradata[1][h] | ||
140 | os.unlink(f) | ||
141 | |||
142 | # When the dicts are originally created, python calls intern() on the set keys | ||
143 | # which significantly improves memory usage. Sadly the pickle/unpickle process | ||
144 | # doesn't call intern() on the keys and results in the same strings being duplicated | ||
145 | # in memory. This also means pickle will save the same string multiple times in | ||
146 | # the cache file. By interning the data here, the cache file shrinks dramatically | ||
147 | # meaning faster load times and the reloaded cache files also consume much less | ||
148 | # memory. This is worth any performance hit from this loops and the use of the | ||
149 | # intern() data storage. | ||
150 | # Python 3.x may behave better in this area | ||
151 | for h in data[0]: | ||
152 | data[0][h]["refs"] = internSet(data[0][h]["refs"]) | ||
153 | data[0][h]["execs"] = internSet(data[0][h]["execs"]) | ||
154 | for h in data[1]: | ||
155 | data[1][h]["execs"] = internSet(data[1][h]["execs"]) | ||
156 | |||
157 | p = pickle.Pickler(file(cachefile, "wb"), -1) | ||
158 | p.dump([data, PARSERCACHE_VERSION]) | ||
159 | |||
160 | bb.utils.unlockfile(glf) | ||
161 | |||
162 | 85 | ||
163 | Logger = logging.getLoggerClass() | 86 | Logger = logging.getLoggerClass() |
164 | class BufferedLogger(Logger): | 87 | class BufferedLogger(Logger): |
@@ -235,14 +158,14 @@ class PythonParser(): | |||
235 | def parse_python(self, node): | 158 | def parse_python(self, node): |
236 | h = hash(str(node)) | 159 | h = hash(str(node)) |
237 | 160 | ||
238 | if h in pythonparsecache: | 161 | if h in codeparsercache.pythoncache: |
239 | self.references = pythonparsecache[h]["refs"] | 162 | self.references = codeparsercache.pythoncache[h]["refs"] |
240 | self.execs = pythonparsecache[h]["execs"] | 163 | self.execs = codeparsercache.pythoncache[h]["execs"] |
241 | return | 164 | return |
242 | 165 | ||
243 | if h in pythonparsecacheextras: | 166 | if h in codeparsercache.pythoncacheextras: |
244 | self.references = pythonparsecacheextras[h]["refs"] | 167 | self.references = codeparsercache.pythoncacheextras[h]["refs"] |
245 | self.execs = pythonparsecacheextras[h]["execs"] | 168 | self.execs = codeparsercache.pythoncacheextras[h]["execs"] |
246 | return | 169 | return |
247 | 170 | ||
248 | 171 | ||
@@ -256,9 +179,9 @@ class PythonParser(): | |||
256 | self.references.update(self.var_references) | 179 | self.references.update(self.var_references) |
257 | self.references.update(self.var_execs) | 180 | self.references.update(self.var_execs) |
258 | 181 | ||
259 | pythonparsecacheextras[h] = {} | 182 | codeparsercache.pythoncacheextras[h] = {} |
260 | pythonparsecacheextras[h]["refs"] = self.references | 183 | codeparsercache.pythoncacheextras[h]["refs"] = self.references |
261 | pythonparsecacheextras[h]["execs"] = self.execs | 184 | codeparsercache.pythoncacheextras[h]["execs"] = self.execs |
262 | 185 | ||
263 | class ShellParser(): | 186 | class ShellParser(): |
264 | def __init__(self, name, log): | 187 | def __init__(self, name, log): |
@@ -276,12 +199,12 @@ class ShellParser(): | |||
276 | 199 | ||
277 | h = hash(str(value)) | 200 | h = hash(str(value)) |
278 | 201 | ||
279 | if h in shellparsecache: | 202 | if h in codeparsercache.shellcache: |
280 | self.execs = shellparsecache[h]["execs"] | 203 | self.execs = codeparsercache.shellcache[h]["execs"] |
281 | return self.execs | 204 | return self.execs |
282 | 205 | ||
283 | if h in shellparsecacheextras: | 206 | if h in codeparsercache.shellcacheextras: |
284 | self.execs = shellparsecacheextras[h]["execs"] | 207 | self.execs = codeparsercache.shellcacheextras[h]["execs"] |
285 | return self.execs | 208 | return self.execs |
286 | 209 | ||
287 | try: | 210 | try: |
@@ -293,8 +216,8 @@ class ShellParser(): | |||
293 | self.process_tokens(token) | 216 | self.process_tokens(token) |
294 | self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) | 217 | self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) |
295 | 218 | ||
296 | shellparsecacheextras[h] = {} | 219 | codeparsercache.shellcacheextras[h] = {} |
297 | shellparsecacheextras[h]["execs"] = self.execs | 220 | codeparsercache.shellcacheextras[h]["execs"] = self.execs |
298 | 221 | ||
299 | return self.execs | 222 | return self.execs |
300 | 223 | ||