From 7d010055e2af3294e17db862f42664ca689a9356 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Thu, 17 Nov 2022 11:09:53 +0000 Subject: bitbake: cache: Allow compression of the data in SiggenRecipeInfo The data in SiggenRecipeInfo is large and has a lot of duplication. The size causes a few problems, impacting: - bitbake's overall memory usage - the amount of data sent over IPC between parsing processes and the server - the size of the cache files on disk - the size of "sigdata" hash information files on disk The data consists of strings (some large) or frozenset lists of variables. To reduce the impact we can: a) deplicate the data b) pass references to the object on the second usage (e.g. over IPC or saving into pickle). This patch does this for SiggenRecipeInfo mostly behind the scenes but we do need a couple of reset points so that streamed data is written correctly on the second usage. (Bitbake rev: 9a2b13af483c20763d6559a823310954884f6ab1) Signed-off-by: Richard Purdie --- bitbake/lib/bb/cache.py | 75 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'bitbake/lib/bb/cache.py') diff --git a/bitbake/lib/bb/cache.py b/bitbake/lib/bb/cache.py index 96ab069180..f5b527ba6a 100644 --- a/bitbake/lib/bb/cache.py +++ b/bitbake/lib/bb/cache.py @@ -263,6 +263,80 @@ class SiggenRecipeInfo(RecipeInfoCommon): cachedata.siggen_varvals[fn] = self.siggen_varvals cachedata.siggen_taskdeps[fn] = self.siggen_taskdeps + # The siggen variable data is large and impacts: + # - bitbake's overall memory usage + # - the amount of data sent over IPC between parsing processes and the server + # - the size of the cache files on disk + # - the size of "sigdata" hash information files on disk + # The data consists of strings (some large) or frozenset lists of variables + # As such, we a) deplicate the data here and b) pass references to the object at second + # access (e.g. over IPC or saving into pickle). + + store = {} + save_map = {} + save_count = 1 + restore_map = {} + restore_count = {} + + @classmethod + def reset(cls): + # Needs to be called before starting new streamed data in a given process + # (e.g. writing out the cache again) + cls.save_map = {} + cls.save_count = 1 + cls.restore_map = {} + cls.restore_count = {} + + @classmethod + def _save(cls, deps): + ret = [] + if not deps: + return deps + for dep in deps: + fs = deps[dep] + if fs in cls.save_map: + ret.append((dep, None, cls.save_map[fs])) + else: + cls.save_map[fs] = cls.save_count + ret.append((dep, fs, None)) + cls.save_count = cls.save_count + 1 + return ret + + @classmethod + def _restore(cls, deps, pid): + ret = {} + if not deps: + return deps + if pid not in cls.restore_map: + cls.restore_map[pid] = {} + cls.restore_count[pid] = 1 + map = cls.restore_map[pid] + for fs, dep, mapnum in deps: + if mapnum: + ret[dep] = map[mapnum] + else: + try: + fs = cls.store[fs] + except KeyError: + cls.store[fs] = fs + map[cls.restore_count[pid]] = fs + cls.restore_count[pid] = cls.restore_count[pid] + 1 + ret[dep] = fs + return ret + + def __getstate__(self): + ret = {} + for key in ["siggen_gendeps", "siggen_taskdeps", "siggen_varvals"]: + ret[key] = self._save(self.__dict__[key]) + ret['pid'] = os.getpid() + return ret + + def __setstate__(self, state): + pid = state['pid'] + for key in ["siggen_gendeps", "siggen_taskdeps", "siggen_varvals"]: + setattr(self, key, self._restore(state[key], pid)) + + def virtualfn2realfn(virtualfn): """ Convert a virtual file name to a real one + the associated subclass keyword @@ -621,6 +695,7 @@ class Cache(object): p.dump(info) del self.depends_cache + SiggenRecipeInfo.reset() @staticmethod def mtime(cachefile): -- cgit v1.2.3-54-g00ecf