summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Eggleton <paul.eggleton@linux.intel.com>2012-05-23 00:23:32 +0100
committerRichard Purdie <richard.purdie@linuxfoundation.org>2012-05-23 11:35:04 +0100
commit8b8be74ed21b878b2fe30d5b76ff0648e6e48c18 (patch)
treef1facae6d6803b450185811ca08bf215d6963530
parentd7b818b51f3e6dded0c0885cdfed5a24cda3b428 (diff)
downloadpoky-8b8be74ed21b878b2fe30d5b76ff0648e6e48c18.tar.gz
bitbake: implement checksums for local files in SRC_URI
Gathers a list of paths to have checksums calculated at parse time, and processes these when calculating task hashes. Checksums are cached with the file's current mtime. Thus, changing any local file in SRC_URI will now cause the do_fetch taskhash to change, thus forcing a rebuild. This change adds very roughly about an 8% increase in parse time (a few seconds) and maybe a few seconds during runqueue generation, so a fairly moderate performance hit. Note that since paths are resolved at parse time, this will not force a rebuild when files are introduced which would cause that resolved path to be different - for example, where a machine-specific version of a file was added without otherwise changing the recipe. This will need to be handled in a future update. Code to hook this into the signature generator was courtesy of Richard Purdie <richard.purdie@linuxfoundation.org>. Implements [YOCTO #2044]. (Bitbake rev: c993b7c457f8b7776e8a5dff253bfa0724bc2cae) Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--bitbake/lib/bb/cache.py13
-rw-r--r--bitbake/lib/bb/checksum.py90
-rw-r--r--bitbake/lib/bb/cooker.py2
-rw-r--r--bitbake/lib/bb/fetch2/__init__.py85
-rw-r--r--bitbake/lib/bb/siggen.py24
5 files changed, 211 insertions, 3 deletions
diff --git a/bitbake/lib/bb/cache.py b/bitbake/lib/bb/cache.py
index 36e6356f51..dea2a80616 100644
--- a/bitbake/lib/bb/cache.py
+++ b/bitbake/lib/bb/cache.py
@@ -43,7 +43,7 @@ except ImportError:
43 logger.info("Importing cPickle failed. " 43 logger.info("Importing cPickle failed. "
44 "Falling back to a very slow implementation.") 44 "Falling back to a very slow implementation.")
45 45
46__cache_version__ = "143" 46__cache_version__ = "144"
47 47
48def getCacheFile(path, filename, data_hash): 48def getCacheFile(path, filename, data_hash):
49 return os.path.join(path, filename + "." + data_hash) 49 return os.path.join(path, filename + "." + data_hash)
@@ -76,9 +76,13 @@ class RecipeInfoCommon(object):
76 for task in tasks) 76 for task in tasks)
77 77
78 @classmethod 78 @classmethod
79 def flaglist(cls, flag, varlist, metadata): 79 def flaglist(cls, flag, varlist, metadata, squash=False):
80 return dict((var, metadata.getVarFlag(var, flag, True)) 80 out_dict = dict((var, metadata.getVarFlag(var, flag, True))
81 for var in varlist) 81 for var in varlist)
82 if squash:
83 return dict((k,v) for (k,v) in out_dict.iteritems() if v)
84 else:
85 return out_dict
82 86
83 @classmethod 87 @classmethod
84 def getvar(cls, var, metadata): 88 def getvar(cls, var, metadata):
@@ -128,6 +132,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
128 self.stamp = self.getvar('STAMP', metadata) 132 self.stamp = self.getvar('STAMP', metadata)
129 self.stamp_base = self.flaglist('stamp-base', self.tasks, metadata) 133 self.stamp_base = self.flaglist('stamp-base', self.tasks, metadata)
130 self.stamp_extrainfo = self.flaglist('stamp-extra-info', self.tasks, metadata) 134 self.stamp_extrainfo = self.flaglist('stamp-extra-info', self.tasks, metadata)
135 self.file_checksums = self.flaglist('file-checksums', self.tasks, metadata, True)
131 self.packages_dynamic = self.listvar('PACKAGES_DYNAMIC', metadata) 136 self.packages_dynamic = self.listvar('PACKAGES_DYNAMIC', metadata)
132 self.depends = self.depvar('DEPENDS', metadata) 137 self.depends = self.depvar('DEPENDS', metadata)
133 self.provides = self.depvar('PROVIDES', metadata) 138 self.provides = self.depvar('PROVIDES', metadata)
@@ -154,6 +159,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
154 cachedata.stamp = {} 159 cachedata.stamp = {}
155 cachedata.stamp_base = {} 160 cachedata.stamp_base = {}
156 cachedata.stamp_extrainfo = {} 161 cachedata.stamp_extrainfo = {}
162 cachedata.file_checksums = {}
157 cachedata.fn_provides = {} 163 cachedata.fn_provides = {}
158 cachedata.pn_provides = defaultdict(list) 164 cachedata.pn_provides = defaultdict(list)
159 cachedata.all_depends = [] 165 cachedata.all_depends = []
@@ -185,6 +191,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
185 cachedata.stamp[fn] = self.stamp 191 cachedata.stamp[fn] = self.stamp
186 cachedata.stamp_base[fn] = self.stamp_base 192 cachedata.stamp_base[fn] = self.stamp_base
187 cachedata.stamp_extrainfo[fn] = self.stamp_extrainfo 193 cachedata.stamp_extrainfo[fn] = self.stamp_extrainfo
194 cachedata.file_checksums[fn] = self.file_checksums
188 195
189 provides = [self.pn] 196 provides = [self.pn]
190 for provide in self.provides: 197 for provide in self.provides:
diff --git a/bitbake/lib/bb/checksum.py b/bitbake/lib/bb/checksum.py
new file mode 100644
index 0000000000..514ff0b1e6
--- /dev/null
+++ b/bitbake/lib/bb/checksum.py
@@ -0,0 +1,90 @@
1# Local file checksum cache implementation
2#
3# Copyright (C) 2012 Intel Corporation
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License version 2 as
7# published by the Free Software Foundation.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License along
15# with this program; if not, write to the Free Software Foundation, Inc.,
16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17
18import os
19import stat
20import bb.utils
21import logging
22from bb.cache import MultiProcessCache
23
24logger = logging.getLogger("BitBake.Cache")
25
26try:
27 import cPickle as pickle
28except ImportError:
29 import pickle
30 logger.info("Importing cPickle failed. "
31 "Falling back to a very slow implementation.")
32
33
34# mtime cache (non-persistent)
35# based upon the assumption that files do not change during bitbake run
36class FileMtimeCache(object):
37 cache = {}
38
39 def cached_mtime(self, f):
40 if f not in self.cache:
41 self.cache[f] = os.stat(f)[stat.ST_MTIME]
42 return self.cache[f]
43
44 def cached_mtime_noerror(self, f):
45 if f not in self.cache:
46 try:
47 self.cache[f] = os.stat(f)[stat.ST_MTIME]
48 except OSError:
49 return 0
50 return self.cache[f]
51
52 def update_mtime(self, f):
53 self.cache[f] = os.stat(f)[stat.ST_MTIME]
54 return self.cache[f]
55
56 def clear(self):
57 self.cache.clear()
58
59# Checksum + mtime cache (persistent)
60class FileChecksumCache(MultiProcessCache):
61 cache_file_name = "local_file_checksum_cache.dat"
62 CACHE_VERSION = 1
63
64 def __init__(self):
65 self.mtime_cache = FileMtimeCache()
66 MultiProcessCache.__init__(self)
67
68 def get_checksum(self, f):
69 entry = self.cachedata[0].get(f)
70 cmtime = self.mtime_cache.cached_mtime(f)
71 if entry:
72 (mtime, hashval) = entry
73 if cmtime == mtime:
74 return hashval
75 else:
76 bb.debug(2, "file %s changed mtime, recompute checksum" % f)
77
78 hashval = bb.utils.md5_file(f)
79 self.cachedata_extras[0][f] = (cmtime, hashval)
80 return hashval
81
82 def merge_data(self, source, dest):
83 for h in source[0]:
84 if h in dest:
85 (smtime, _) = source[0][h]
86 (dmtime, _) = dest[0][h]
87 if smtime > dmtime:
88 dest[0][h] = source[0][h]
89 else:
90 dest[0][h] = source[0][h]
diff --git a/bitbake/lib/bb/cooker.py b/bitbake/lib/bb/cooker.py
index dea0aadbee..8ad4922651 100644
--- a/bitbake/lib/bb/cooker.py
+++ b/bitbake/lib/bb/cooker.py
@@ -1570,6 +1570,7 @@ class CookerParser(object):
1570 def init(): 1570 def init():
1571 Parser.cfg = self.cfgdata 1571 Parser.cfg = self.cfgdata
1572 multiprocessing.util.Finalize(None, bb.codeparser.parser_cache_save, args=(self.cfgdata,), exitpriority=1) 1572 multiprocessing.util.Finalize(None, bb.codeparser.parser_cache_save, args=(self.cfgdata,), exitpriority=1)
1573 multiprocessing.util.Finalize(None, bb.fetch.fetcher_parse_save, args=(self.cfgdata,), exitpriority=1)
1573 1574
1574 self.feeder_quit = multiprocessing.Queue(maxsize=1) 1575 self.feeder_quit = multiprocessing.Queue(maxsize=1)
1575 self.parser_quit = multiprocessing.Queue(maxsize=self.num_processes) 1576 self.parser_quit = multiprocessing.Queue(maxsize=self.num_processes)
@@ -1618,6 +1619,7 @@ class CookerParser(object):
1618 sync.start() 1619 sync.start()
1619 multiprocessing.util.Finalize(None, sync.join, exitpriority=-100) 1620 multiprocessing.util.Finalize(None, sync.join, exitpriority=-100)
1620 bb.codeparser.parser_cache_savemerge(self.cooker.configuration.data) 1621 bb.codeparser.parser_cache_savemerge(self.cooker.configuration.data)
1622 bb.fetch.fetcher_parse_done(self.cooker.configuration.data)
1621 1623
1622 def load_cached(self): 1624 def load_cached(self):
1623 for filename, appends in self.fromcache: 1625 for filename, appends in self.fromcache:
diff --git a/bitbake/lib/bb/fetch2/__init__.py b/bitbake/lib/bb/fetch2/__init__.py
index 0b976c4079..d4b6c3ec39 100644
--- a/bitbake/lib/bb/fetch2/__init__.py
+++ b/bitbake/lib/bb/fetch2/__init__.py
@@ -8,6 +8,7 @@ BitBake build tools.
8""" 8"""
9 9
10# Copyright (C) 2003, 2004 Chris Larson 10# Copyright (C) 2003, 2004 Chris Larson
11# Copyright (C) 2012 Intel Corporation
11# 12#
12# This program is free software; you can redistribute it and/or modify 13# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License version 2 as 14# it under the terms of the GNU General Public License version 2 as
@@ -30,9 +31,11 @@ import os, re
30import logging 31import logging
31import urllib 32import urllib
32import bb.persist_data, bb.utils 33import bb.persist_data, bb.utils
34import bb.checksum
33from bb import data 35from bb import data
34 36
35__version__ = "2" 37__version__ = "2"
38_checksum_cache = bb.checksum.FileChecksumCache()
36 39
37logger = logging.getLogger("BitBake.Fetcher") 40logger = logging.getLogger("BitBake.Fetcher")
38 41
@@ -233,10 +236,18 @@ def fetcher_init(d):
233 else: 236 else:
234 raise FetchError("Invalid SRCREV cache policy of: %s" % srcrev_policy) 237 raise FetchError("Invalid SRCREV cache policy of: %s" % srcrev_policy)
235 238
239 _checksum_cache.init_cache(d)
240
236 for m in methods: 241 for m in methods:
237 if hasattr(m, "init"): 242 if hasattr(m, "init"):
238 m.init(d) 243 m.init(d)
239 244
245def fetcher_parse_save(d):
246 _checksum_cache.save_extras(d)
247
248def fetcher_parse_done(d):
249 _checksum_cache.save_merge(d)
250
240def fetcher_compare_revisions(d): 251def fetcher_compare_revisions(d):
241 """ 252 """
242 Compare the revisions in the persistant cache with current values and 253 Compare the revisions in the persistant cache with current values and
@@ -553,6 +564,80 @@ def srcrev_internal_helper(ud, d, name):
553 564
554 return rev 565 return rev
555 566
567
568def get_checksum_file_list(d):
569 """ Get a list of files checksum in SRC_URI
570
571 Returns the all resolved local path of all local file entries in
572 SRC_URI as a space-separated string
573 """
574 fetch = Fetch([], d)
575
576 dl_dir = d.getVar('DL_DIR', True)
577 filelist = []
578 for u in fetch.urls:
579 ud = fetch.ud[u]
580
581 if isinstance(ud.method, local.Local):
582 ud.setup_localpath(d)
583 f = ud.localpath
584 if f.startswith(dl_dir):
585 # The local fetcher's behaviour is to return a path under DL_DIR if it couldn't find the file anywhere else
586 if os.path.exists(f):
587 bb.warn("Getting checksum for %s SRC_URI entry %s: file not found except in DL_DIR" % (d.getVar('PN', True), os.path.basename(f)))
588 else:
589 bb.warn("Unable to get checksum for %s SRC_URI entry %s: file could not be found" % (d.getVar('PN', True), os.path.basename(f)))
590 continue
591 filelist.append(f)
592
593 return " ".join(filelist)
594
595
596def get_file_checksums(filelist, pn):
597 """Get a list of the checksums for a list of local files
598
599 Returns the checksums for a list of local files, caching the results as
600 it proceeds
601
602 """
603
604 def checksum_file(f):
605 try:
606 checksum = _checksum_cache.get_checksum(f)
607 except OSError as e:
608 import traceback
609 bb.warn("Unable to get checksum for %s SRC_URI entry %s: %s" % (pn, os.path.basename(f), e))
610 return None
611 return checksum
612
613 checksums = []
614 for pth in filelist.split():
615 checksum = None
616 if '*' in pth:
617 # Handle globs
618 import glob
619 for f in glob.glob(pth):
620 checksum = checksum_file(f)
621 if checksum:
622 checksums.append((f, checksum))
623 elif os.path.isdir(pth):
624 # Handle directories
625 for root, dirs, files in os.walk(pth):
626 for name in files:
627 fullpth = os.path.join(root, name)
628 checksum = checksum_file(fullpth)
629 if checksum:
630 checksums.append((fullpth, checksum))
631 else:
632 checksum = checksum_file(pth)
633
634 if checksum:
635 checksums.append((pth, checksum))
636
637 checksums.sort()
638 return checksums
639
640
556class FetchData(object): 641class FetchData(object):
557 """ 642 """
558 A class which represents the fetcher state for a given URI. 643 A class which represents the fetcher state for a given URI.
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py
index 5a0b80e8a9..daf56770f9 100644
--- a/bitbake/lib/bb/siggen.py
+++ b/bitbake/lib/bb/siggen.py
@@ -60,6 +60,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
60 self.taskhash = {} 60 self.taskhash = {}
61 self.taskdeps = {} 61 self.taskdeps = {}
62 self.runtaskdeps = {} 62 self.runtaskdeps = {}
63 self.file_checksum_values = {}
63 self.gendeps = {} 64 self.gendeps = {}
64 self.lookupcache = {} 65 self.lookupcache = {}
65 self.pkgnameextract = re.compile("(?P<fn>.*)\..*") 66 self.pkgnameextract = re.compile("(?P<fn>.*)\..*")
@@ -152,6 +153,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
152 k = fn + "." + task 153 k = fn + "." + task
153 data = dataCache.basetaskhash[k] 154 data = dataCache.basetaskhash[k]
154 self.runtaskdeps[k] = [] 155 self.runtaskdeps[k] = []
156 self.file_checksum_values[k] = {}
155 recipename = dataCache.pkg_fn[fn] 157 recipename = dataCache.pkg_fn[fn]
156 for dep in sorted(deps, key=clean_basepath): 158 for dep in sorted(deps, key=clean_basepath):
157 depname = dataCache.pkg_fn[self.pkgnameextract.search(dep).group('fn')] 159 depname = dataCache.pkg_fn[self.pkgnameextract.search(dep).group('fn')]
@@ -161,6 +163,12 @@ class SignatureGeneratorBasic(SignatureGenerator):
161 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?", dep) 163 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?", dep)
162 data = data + self.taskhash[dep] 164 data = data + self.taskhash[dep]
163 self.runtaskdeps[k].append(dep) 165 self.runtaskdeps[k].append(dep)
166
167 if task in dataCache.file_checksums[fn]:
168 checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename)
169 for (f,cs) in checksums:
170 self.file_checksum_values[k][f] = cs
171 data = data + cs
164 h = hashlib.md5(data).hexdigest() 172 h = hashlib.md5(data).hexdigest()
165 self.taskhash[k] = h 173 self.taskhash[k] = h
166 #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task]) 174 #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
@@ -197,6 +205,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
197 205
198 if runtime and k in self.taskhash: 206 if runtime and k in self.taskhash:
199 data['runtaskdeps'] = self.runtaskdeps[k] 207 data['runtaskdeps'] = self.runtaskdeps[k]
208 data['file_checksum_values'] = self.file_checksum_values[k]
200 data['runtaskhashes'] = {} 209 data['runtaskhashes'] = {}
201 for dep in data['runtaskdeps']: 210 for dep in data['runtaskdeps']:
202 data['runtaskhashes'][dep] = self.taskhash[dep] 211 data['runtaskhashes'][dep] = self.taskhash[dep]
@@ -304,6 +313,18 @@ def compare_sigfiles(a, b):
304 for dep in changed: 313 for dep in changed:
305 print "Variable %s value changed from %s to %s" % (dep, a_data['varvals'][dep], b_data['varvals'][dep]) 314 print "Variable %s value changed from %s to %s" % (dep, a_data['varvals'][dep], b_data['varvals'][dep])
306 315
316 changed, added, removed = dict_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
317 if changed:
318 for f in changed:
319 print "Checksum for file %s changed from %s to %s" % (f, a_data['file_checksum_values'][f], b_data['file_checksum_values'][f])
320 if added:
321 for f in added:
322 print "Dependency on checksum of file %s was added" % (f)
323 if removed:
324 for f in removed:
325 print "Dependency on checksum of file %s was removed" % (f)
326
327
307 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 328 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
308 a = clean_basepaths(a_data['runtaskhashes']) 329 a = clean_basepaths(a_data['runtaskhashes'])
309 b = clean_basepaths(b_data['runtaskhashes']) 330 b = clean_basepaths(b_data['runtaskhashes'])
@@ -353,6 +374,9 @@ def dump_sigfile(a):
353 if 'runtaskdeps' in a_data: 374 if 'runtaskdeps' in a_data:
354 print "Tasks this task depends on: %s" % (a_data['runtaskdeps']) 375 print "Tasks this task depends on: %s" % (a_data['runtaskdeps'])
355 376
377 if 'file_checksum_values' in a_data:
378 print "This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])
379
356 if 'runtaskhashes' in a_data: 380 if 'runtaskhashes' in a_data:
357 for dep in a_data['runtaskhashes']: 381 for dep in a_data['runtaskhashes']:
358 print "Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]) 382 print "Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])