summaryrefslogtreecommitdiffstats
path: root/meta
diff options
context:
space:
mode:
authorJoshua Watt <jpewhacker@gmail.com>2019-01-04 10:20:15 -0600
committerRichard Purdie <richard.purdie@linuxfoundation.org>2019-01-08 11:16:44 +0000
commitadc37721a86ce44c0223b7b03aabd7deceefe57d (patch)
treef917b9bdf3f9d5fa3c53bfce68cc947a44ebc1fa /meta
parentcbdfa376633d4cf2d86a0f6953d5b0e3a076e06d (diff)
downloadpoky-adc37721a86ce44c0223b7b03aabd7deceefe57d.tar.gz
sstate: Implement hash equivalence sstate
Converts sstate so that it can use a hash equivalence server to determine if a task really needs to be rebuilt, or if it can be restored from a different (equivalent) sstate object. The unique hashes are cached persistently using persist_data. This has a number of advantages: 1) Unique hashes can be cached between invocations of bitbake to prevent needing to contact the server every time (which is slow) 2) The value of each tasks unique hash can easily be synchronized between different threads, which will be useful if bitbake is updated to do on the fly task re-hashing. [YOCTO #13030] (From OE-Core rev: d889acb4f8f06f09cece80fa12661725e6e5f037) Signed-off-by: Joshua Watt <JPEWhacker@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'meta')
-rw-r--r--meta/classes/sstate.bbclass105
-rw-r--r--meta/conf/bitbake.conf4
-rw-r--r--meta/lib/oe/sstatesig.py167
3 files changed, 267 insertions, 9 deletions
diff --git a/meta/classes/sstate.bbclass b/meta/classes/sstate.bbclass
index 59ebc3ab5c..da0807d6e9 100644
--- a/meta/classes/sstate.bbclass
+++ b/meta/classes/sstate.bbclass
@@ -11,7 +11,7 @@ def generate_sstatefn(spec, hash, d):
11SSTATE_PKGARCH = "${PACKAGE_ARCH}" 11SSTATE_PKGARCH = "${PACKAGE_ARCH}"
12SSTATE_PKGSPEC = "sstate:${PN}:${PACKAGE_ARCH}${TARGET_VENDOR}-${TARGET_OS}:${PV}:${PR}:${SSTATE_PKGARCH}:${SSTATE_VERSION}:" 12SSTATE_PKGSPEC = "sstate:${PN}:${PACKAGE_ARCH}${TARGET_VENDOR}-${TARGET_OS}:${PV}:${PR}:${SSTATE_PKGARCH}:${SSTATE_VERSION}:"
13SSTATE_SWSPEC = "sstate:${PN}::${PV}:${PR}::${SSTATE_VERSION}:" 13SSTATE_SWSPEC = "sstate:${PN}::${PV}:${PR}::${SSTATE_VERSION}:"
14SSTATE_PKGNAME = "${SSTATE_EXTRAPATH}${@generate_sstatefn(d.getVar('SSTATE_PKGSPEC'), d.getVar('BB_TASKHASH'), d)}" 14SSTATE_PKGNAME = "${SSTATE_EXTRAPATH}${@generate_sstatefn(d.getVar('SSTATE_PKGSPEC'), d.getVar('BB_UNIHASH'), d)}"
15SSTATE_PKG = "${SSTATE_DIR}/${SSTATE_PKGNAME}" 15SSTATE_PKG = "${SSTATE_DIR}/${SSTATE_PKGNAME}"
16SSTATE_EXTRAPATH = "" 16SSTATE_EXTRAPATH = ""
17SSTATE_EXTRAPATHWILDCARD = "" 17SSTATE_EXTRAPATHWILDCARD = ""
@@ -82,6 +82,23 @@ SSTATE_SIG_PASSPHRASE ?= ""
82# Whether to verify the GnUPG signatures when extracting sstate archives 82# Whether to verify the GnUPG signatures when extracting sstate archives
83SSTATE_VERIFY_SIG ?= "0" 83SSTATE_VERIFY_SIG ?= "0"
84 84
85SSTATE_HASHEQUIV_METHOD ?= "OEOuthashBasic"
86SSTATE_HASHEQUIV_METHOD[doc] = "The function used to calculate the output hash \
87 for a task, which in turn is used to determine equivalency. \
88 "
89
90SSTATE_HASHEQUIV_SERVER ?= ""
91SSTATE_HASHEQUIV_SERVER[doc] = "The hash equivalence sever. For example, \
92 'http://192.168.0.1:5000'. Do not include a trailing slash \
93 "
94
95SSTATE_HASHEQUIV_REPORT_TASKDATA ?= "0"
96SSTATE_HASHEQUIV_REPORT_TASKDATA[doc] = "Report additional useful data to the \
97 hash equivalency server, such as PN, PV, taskname, etc. This information \
98 is very useful for developers looking at task data, but may leak sensitive \
99 data if the equivalence server is public. \
100 "
101
85python () { 102python () {
86 if bb.data.inherits_class('native', d): 103 if bb.data.inherits_class('native', d):
87 d.setVar('SSTATE_PKGARCH', d.getVar('BUILD_ARCH', False)) 104 d.setVar('SSTATE_PKGARCH', d.getVar('BUILD_ARCH', False))
@@ -640,7 +657,7 @@ def sstate_package(ss, d):
640 return 657 return
641 658
642 for f in (d.getVar('SSTATECREATEFUNCS') or '').split() + \ 659 for f in (d.getVar('SSTATECREATEFUNCS') or '').split() + \
643 ['sstate_create_package', 'sstate_sign_package'] + \ 660 ['sstate_report_unihash', 'sstate_create_package', 'sstate_sign_package'] + \
644 (d.getVar('SSTATEPOSTCREATEFUNCS') or '').split(): 661 (d.getVar('SSTATEPOSTCREATEFUNCS') or '').split():
645 # All hooks should run in SSTATE_BUILDDIR. 662 # All hooks should run in SSTATE_BUILDDIR.
646 bb.build.exec_func(f, d, (sstatebuild,)) 663 bb.build.exec_func(f, d, (sstatebuild,))
@@ -764,6 +781,73 @@ python sstate_sign_package () {
764 d.getVar('SSTATE_SIG_PASSPHRASE'), armor=False) 781 d.getVar('SSTATE_SIG_PASSPHRASE'), armor=False)
765} 782}
766 783
784def OEOuthashBasic(path, sigfile, task, d):
785 import hashlib
786 import stat
787
788 def update_hash(s):
789 s = s.encode('utf-8')
790 h.update(s)
791 if sigfile:
792 sigfile.write(s)
793
794 h = hashlib.sha256()
795 prev_dir = os.getcwd()
796
797 try:
798 os.chdir(path)
799
800 update_hash("OEOuthashBasic\n")
801
802 # It is only currently useful to get equivalent hashes for things that
803 # can be restored from sstate. Since the sstate object is named using
804 # SSTATE_PKGSPEC and the task name, those should be included in the
805 # output hash calculation.
806 update_hash("SSTATE_PKGSPEC=%s\n" % d.getVar('SSTATE_PKGSPEC'))
807 update_hash("task=%s\n" % task)
808
809 for root, dirs, files in os.walk('.', topdown=True):
810 # Sort directories and files to ensure consistent ordering
811 dirs.sort()
812 files.sort()
813
814 for f in files:
815 path = os.path.join(root, f)
816 s = os.lstat(path)
817
818 # Hash file path
819 update_hash(path + '\n')
820
821 # Hash file mode
822 update_hash("\tmode=0x%x\n" % stat.S_IMODE(s.st_mode))
823 update_hash("\ttype=0x%x\n" % stat.S_IFMT(s.st_mode))
824
825 if stat.S_ISBLK(s.st_mode) or stat.S_ISBLK(s.st_mode):
826 # Hash device major and minor
827 update_hash("\tdev=%d,%d\n" % (os.major(s.st_rdev), os.minor(s.st_rdev)))
828 elif stat.S_ISLNK(s.st_mode):
829 # Hash symbolic link
830 update_hash("\tsymlink=%s\n" % os.readlink(path))
831 else:
832 fh = hashlib.sha256()
833 # Hash file contents
834 with open(path, 'rb') as d:
835 for chunk in iter(lambda: d.read(4096), b""):
836 fh.update(chunk)
837 update_hash("\tdigest=%s\n" % fh.hexdigest())
838 finally:
839 os.chdir(prev_dir)
840
841 return h.hexdigest()
842
843python sstate_report_unihash() {
844 report_unihash = getattr(bb.parse.siggen, 'report_unihash', None)
845
846 if report_unihash:
847 ss = sstate_state_fromvars(d)
848 report_unihash(os.getcwd(), ss['task'], d)
849}
850
767# 851#
768# Shell function to decompress and prepare a package for installation 852# Shell function to decompress and prepare a package for installation
769# Will be run from within SSTATE_INSTDIR. 853# Will be run from within SSTATE_INSTDIR.
@@ -788,6 +872,11 @@ def sstate_checkhashes(sq_fn, sq_task, sq_hash, sq_hashfn, d, siginfo=False, *,
788 if siginfo: 872 if siginfo:
789 extension = extension + ".siginfo" 873 extension = extension + ".siginfo"
790 874
875 def gethash(task):
876 if sq_unihash is not None:
877 return sq_unihash[task]
878 return sq_hash[task]
879
791 def getpathcomponents(task, d): 880 def getpathcomponents(task, d):
792 # Magic data from BB_HASHFILENAME 881 # Magic data from BB_HASHFILENAME
793 splithashfn = sq_hashfn[task].split(" ") 882 splithashfn = sq_hashfn[task].split(" ")
@@ -810,7 +899,7 @@ def sstate_checkhashes(sq_fn, sq_task, sq_hash, sq_hashfn, d, siginfo=False, *,
810 899
811 spec, extrapath, tname = getpathcomponents(task, d) 900 spec, extrapath, tname = getpathcomponents(task, d)
812 901
813 sstatefile = d.expand("${SSTATE_DIR}/" + extrapath + generate_sstatefn(spec, sq_hash[task], d) + "_" + tname + extension) 902 sstatefile = d.expand("${SSTATE_DIR}/" + extrapath + generate_sstatefn(spec, gethash(task), d) + "_" + tname + extension)
814 903
815 if os.path.exists(sstatefile): 904 if os.path.exists(sstatefile):
816 bb.debug(2, "SState: Found valid sstate file %s" % sstatefile) 905 bb.debug(2, "SState: Found valid sstate file %s" % sstatefile)
@@ -872,7 +961,7 @@ def sstate_checkhashes(sq_fn, sq_task, sq_hash, sq_hashfn, d, siginfo=False, *,
872 if task in ret: 961 if task in ret:
873 continue 962 continue
874 spec, extrapath, tname = getpathcomponents(task, d) 963 spec, extrapath, tname = getpathcomponents(task, d)
875 sstatefile = d.expand(extrapath + generate_sstatefn(spec, sq_hash[task], d) + "_" + tname + extension) 964 sstatefile = d.expand(extrapath + generate_sstatefn(spec, gethash(task), d) + "_" + tname + extension)
876 tasklist.append((task, sstatefile)) 965 tasklist.append((task, sstatefile))
877 966
878 if tasklist: 967 if tasklist:
@@ -898,12 +987,12 @@ def sstate_checkhashes(sq_fn, sq_task, sq_hash, sq_hashfn, d, siginfo=False, *,
898 evdata = {'missed': [], 'found': []}; 987 evdata = {'missed': [], 'found': []};
899 for task in missed: 988 for task in missed:
900 spec, extrapath, tname = getpathcomponents(task, d) 989 spec, extrapath, tname = getpathcomponents(task, d)
901 sstatefile = d.expand(extrapath + generate_sstatefn(spec, sq_hash[task], d) + "_" + tname + ".tgz") 990 sstatefile = d.expand(extrapath + generate_sstatefn(spec, gethash(task), d) + "_" + tname + ".tgz")
902 evdata['missed'].append( (sq_fn[task], sq_task[task], sq_hash[task], sstatefile ) ) 991 evdata['missed'].append( (sq_fn[task], sq_task[task], gethash(task), sstatefile ) )
903 for task in ret: 992 for task in ret:
904 spec, extrapath, tname = getpathcomponents(task, d) 993 spec, extrapath, tname = getpathcomponents(task, d)
905 sstatefile = d.expand(extrapath + generate_sstatefn(spec, sq_hash[task], d) + "_" + tname + ".tgz") 994 sstatefile = d.expand(extrapath + generate_sstatefn(spec, gethash(task), d) + "_" + tname + ".tgz")
906 evdata['found'].append( (sq_fn[task], sq_task[task], sq_hash[task], sstatefile ) ) 995 evdata['found'].append( (sq_fn[task], sq_task[task], gethash(task), sstatefile ) )
907 bb.event.fire(bb.event.MetadataEvent("MissedSstate", evdata), d) 996 bb.event.fire(bb.event.MetadataEvent("MissedSstate", evdata), d)
908 997
909 # Print some summary statistics about the current task completion and how much sstate 998 # Print some summary statistics about the current task completion and how much sstate
diff --git a/meta/conf/bitbake.conf b/meta/conf/bitbake.conf
index 6480062354..e64ce6a6da 100644
--- a/meta/conf/bitbake.conf
+++ b/meta/conf/bitbake.conf
@@ -867,7 +867,9 @@ BB_HASHBASE_WHITELIST ?= "TMPDIR FILE PATH PWD BB_TASKHASH BBPATH BBSERVER DL_DI
867 STAMPS_DIR PRSERV_DUMPDIR PRSERV_DUMPFILE PRSERV_LOCKDOWN PARALLEL_MAKE \ 867 STAMPS_DIR PRSERV_DUMPDIR PRSERV_DUMPFILE PRSERV_LOCKDOWN PARALLEL_MAKE \
868 CCACHE_DIR EXTERNAL_TOOLCHAIN CCACHE CCACHE_NOHASHDIR LICENSE_PATH SDKPKGSUFFIX \ 868 CCACHE_DIR EXTERNAL_TOOLCHAIN CCACHE CCACHE_NOHASHDIR LICENSE_PATH SDKPKGSUFFIX \
869 WARN_QA ERROR_QA WORKDIR STAMPCLEAN PKGDATA_DIR BUILD_ARCH SSTATE_PKGARCH \ 869 WARN_QA ERROR_QA WORKDIR STAMPCLEAN PKGDATA_DIR BUILD_ARCH SSTATE_PKGARCH \
870 BB_WORKERCONTEXT BB_LIMITEDDEPS extend_recipe_sysroot DEPLOY_DIR" 870 BB_WORKERCONTEXT BB_LIMITEDDEPS BB_UNIHASH extend_recipe_sysroot DEPLOY_DIR \
871 SSTATE_HASHEQUIV_METHOD SSTATE_HASHEQUIV_SERVER SSTATE_HASHEQUIV_REPORT_TASKDATA \
872 SSTATE_HASHEQUIV_OWNER"
871BB_HASHCONFIG_WHITELIST ?= "${BB_HASHBASE_WHITELIST} DATE TIME SSH_AGENT_PID \ 873BB_HASHCONFIG_WHITELIST ?= "${BB_HASHBASE_WHITELIST} DATE TIME SSH_AGENT_PID \
872 SSH_AUTH_SOCK PSEUDO_BUILD BB_ENV_EXTRAWHITE DISABLE_SANITY_CHECKS \ 874 SSH_AUTH_SOCK PSEUDO_BUILD BB_ENV_EXTRAWHITE DISABLE_SANITY_CHECKS \
873 PARALLEL_MAKE BB_NUMBER_THREADS BB_ORIGENV BB_INVALIDCONF BBINCLUDED \ 875 PARALLEL_MAKE BB_NUMBER_THREADS BB_ORIGENV BB_INVALIDCONF BBINCLUDED \
diff --git a/meta/lib/oe/sstatesig.py b/meta/lib/oe/sstatesig.py
index 18c5a353a2..059e165c7a 100644
--- a/meta/lib/oe/sstatesig.py
+++ b/meta/lib/oe/sstatesig.py
@@ -263,10 +263,177 @@ class SignatureGeneratorOEBasicHash(bb.siggen.SignatureGeneratorBasicHash):
263 if error_msgs: 263 if error_msgs:
264 bb.fatal("\n".join(error_msgs)) 264 bb.fatal("\n".join(error_msgs))
265 265
266class SignatureGeneratorOEEquivHash(SignatureGeneratorOEBasicHash):
267 name = "OEEquivHash"
268
269 def init_rundepcheck(self, data):
270 super().init_rundepcheck(data)
271 self.server = data.getVar('SSTATE_HASHEQUIV_SERVER')
272 self.method = data.getVar('SSTATE_HASHEQUIV_METHOD')
273 self.unihashes = bb.persist_data.persist('SSTATESIG_UNIHASH_CACHE_v1_' + self.method, data)
274
275 def get_taskdata(self):
276 return (self.server, self.method) + super().get_taskdata()
277
278 def set_taskdata(self, data):
279 self.server, self.method = data[:2]
280 super().set_taskdata(data[2:])
281
282 def __get_task_unihash_key(self, task):
283 # TODO: The key only *needs* to be the taskhash, the task is just
284 # convenient
285 return '%s:%s' % (task, self.taskhash[task])
286
287 def get_stampfile_hash(self, task):
288 if task in self.taskhash:
289 # If a unique hash is reported, use it as the stampfile hash. This
290 # ensures that if a task won't be re-run if the taskhash changes,
291 # but it would result in the same output hash
292 unihash = self.unihashes.get(self.__get_task_unihash_key(task))
293 if unihash is not None:
294 return unihash
295
296 return super().get_stampfile_hash(task)
297
298 def get_unihash(self, task):
299 import urllib
300 import json
301
302 taskhash = self.taskhash[task]
303
304 key = self.__get_task_unihash_key(task)
305
306 # TODO: This cache can grow unbounded. It probably only needs to keep
307 # for each task
308 unihash = self.unihashes.get(key)
309 if unihash is not None:
310 return unihash
311
312 # In the absence of being able to discover a unique hash from the
313 # server, make it be equivalent to the taskhash. The unique "hash" only
314 # really needs to be a unique string (not even necessarily a hash), but
315 # making it match the taskhash has a few advantages:
316 #
317 # 1) All of the sstate code that assumes hashes can be the same
318 # 2) It provides maximal compatibility with builders that don't use
319 # an equivalency server
320 # 3) The value is easy for multiple independent builders to derive the
321 # same unique hash from the same input. This means that if the
322 # independent builders find the same taskhash, but it isn't reported
323 # to the server, there is a better chance that they will agree on
324 # the unique hash.
325 unihash = taskhash
326
327 try:
328 url = '%s/v1/equivalent?%s' % (self.server,
329 urllib.parse.urlencode({'method': self.method, 'taskhash': self.taskhash[task]}))
330
331 request = urllib.request.Request(url)
332 response = urllib.request.urlopen(request)
333 data = response.read().decode('utf-8')
334
335 json_data = json.loads(data)
336
337 if json_data:
338 unihash = json_data['unihash']
339 # A unique hash equal to the taskhash is not very interesting,
340 # so it is reported it at debug level 2. If they differ, that
341 # is much more interesting, so it is reported at debug level 1
342 bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, task, self.server))
343 else:
344 bb.debug(2, 'No reported unihash for %s:%s from %s' % (task, taskhash, self.server))
345 except urllib.error.URLError as e:
346 bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
347 except (KeyError, json.JSONDecodeError) as e:
348 bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e)))
349
350 self.unihashes[key] = unihash
351 return unihash
352
353 def report_unihash(self, path, task, d):
354 import urllib
355 import json
356 import tempfile
357 import base64
358
359 taskhash = d.getVar('BB_TASKHASH')
360 unihash = d.getVar('BB_UNIHASH')
361 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
362 tempdir = d.getVar('T')
363 fn = d.getVar('BB_FILENAME')
364 key = fn + '.do_' + task + ':' + taskhash
365
366 # Sanity checks
367 cache_unihash = self.unihashes.get(key)
368 if cache_unihash is None:
369 bb.fatal('%s not in unihash cache. Please report this error' % key)
370
371 if cache_unihash != unihash:
372 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
373
374 sigfile = None
375 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
376 sigfile_link = "depsig.do_%s" % task
377
378 try:
379 call = self.method + '(path, sigfile, task, d)'
380 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
381 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
382
383 outhash = bb.utils.better_eval(call, locs)
384
385 try:
386 url = '%s/v1/equivalent' % self.server
387 task_data = {
388 'taskhash': taskhash,
389 'method': self.method,
390 'outhash': outhash,
391 'unihash': unihash,
392 'owner': d.getVar('SSTATE_HASHEQUIV_OWNER')
393 }
394
395 if report_taskdata:
396 sigfile.seek(0)
397
398 task_data['PN'] = d.getVar('PN')
399 task_data['PV'] = d.getVar('PV')
400 task_data['PR'] = d.getVar('PR')
401 task_data['task'] = task
402 task_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
403
404 headers = {'content-type': 'application/json'}
405
406 request = urllib.request.Request(url, json.dumps(task_data).encode('utf-8'), headers)
407 response = urllib.request.urlopen(request)
408 data = response.read().decode('utf-8')
409
410 json_data = json.loads(data)
411 new_unihash = json_data['unihash']
412
413 if new_unihash != unihash:
414 bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
415 else:
416 bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
417 except urllib.error.URLError as e:
418 bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
419 except (KeyError, json.JSONDecodeError) as e:
420 bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e)))
421 finally:
422 if sigfile:
423 sigfile.close()
424
425 sigfile_link_path = os.path.join(tempdir, sigfile_link)
426 bb.utils.remove(sigfile_link_path)
427
428 try:
429 os.symlink(sigfile_name, sigfile_link_path)
430 except OSError:
431 pass
266 432
267# Insert these classes into siggen's namespace so it can see and select them 433# Insert these classes into siggen's namespace so it can see and select them
268bb.siggen.SignatureGeneratorOEBasic = SignatureGeneratorOEBasic 434bb.siggen.SignatureGeneratorOEBasic = SignatureGeneratorOEBasic
269bb.siggen.SignatureGeneratorOEBasicHash = SignatureGeneratorOEBasicHash 435bb.siggen.SignatureGeneratorOEBasicHash = SignatureGeneratorOEBasicHash
436bb.siggen.SignatureGeneratorOEEquivHash = SignatureGeneratorOEEquivHash
270 437
271 438
272def find_siginfo(pn, taskname, taskhashlist, d): 439def find_siginfo(pn, taskname, taskhashlist, d):