diff options
author | Joshua Watt <jpewhacker@gmail.com> | 2019-01-04 10:20:15 -0600 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2019-01-08 11:16:44 +0000 |
commit | adc37721a86ce44c0223b7b03aabd7deceefe57d (patch) | |
tree | f917b9bdf3f9d5fa3c53bfce68cc947a44ebc1fa /meta/lib | |
parent | cbdfa376633d4cf2d86a0f6953d5b0e3a076e06d (diff) | |
download | poky-adc37721a86ce44c0223b7b03aabd7deceefe57d.tar.gz |
sstate: Implement hash equivalence sstate
Converts sstate so that it can use a hash equivalence server to
determine if a task really needs to be rebuilt, or if it can be restored
from a different (equivalent) sstate object.
The unique hashes are cached persistently using persist_data. This has
a number of advantages:
1) Unique hashes can be cached between invocations of bitbake to
prevent needing to contact the server every time (which is slow)
2) The value of each tasks unique hash can easily be synchronized
between different threads, which will be useful if bitbake is
updated to do on the fly task re-hashing.
[YOCTO #13030]
(From OE-Core rev: d889acb4f8f06f09cece80fa12661725e6e5f037)
Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'meta/lib')
-rw-r--r-- | meta/lib/oe/sstatesig.py | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/meta/lib/oe/sstatesig.py b/meta/lib/oe/sstatesig.py index 18c5a353a2..059e165c7a 100644 --- a/meta/lib/oe/sstatesig.py +++ b/meta/lib/oe/sstatesig.py | |||
@@ -263,10 +263,177 @@ class SignatureGeneratorOEBasicHash(bb.siggen.SignatureGeneratorBasicHash): | |||
263 | if error_msgs: | 263 | if error_msgs: |
264 | bb.fatal("\n".join(error_msgs)) | 264 | bb.fatal("\n".join(error_msgs)) |
265 | 265 | ||
266 | class SignatureGeneratorOEEquivHash(SignatureGeneratorOEBasicHash): | ||
267 | name = "OEEquivHash" | ||
268 | |||
269 | def init_rundepcheck(self, data): | ||
270 | super().init_rundepcheck(data) | ||
271 | self.server = data.getVar('SSTATE_HASHEQUIV_SERVER') | ||
272 | self.method = data.getVar('SSTATE_HASHEQUIV_METHOD') | ||
273 | self.unihashes = bb.persist_data.persist('SSTATESIG_UNIHASH_CACHE_v1_' + self.method, data) | ||
274 | |||
275 | def get_taskdata(self): | ||
276 | return (self.server, self.method) + super().get_taskdata() | ||
277 | |||
278 | def set_taskdata(self, data): | ||
279 | self.server, self.method = data[:2] | ||
280 | super().set_taskdata(data[2:]) | ||
281 | |||
282 | def __get_task_unihash_key(self, task): | ||
283 | # TODO: The key only *needs* to be the taskhash, the task is just | ||
284 | # convenient | ||
285 | return '%s:%s' % (task, self.taskhash[task]) | ||
286 | |||
287 | def get_stampfile_hash(self, task): | ||
288 | if task in self.taskhash: | ||
289 | # If a unique hash is reported, use it as the stampfile hash. This | ||
290 | # ensures that if a task won't be re-run if the taskhash changes, | ||
291 | # but it would result in the same output hash | ||
292 | unihash = self.unihashes.get(self.__get_task_unihash_key(task)) | ||
293 | if unihash is not None: | ||
294 | return unihash | ||
295 | |||
296 | return super().get_stampfile_hash(task) | ||
297 | |||
298 | def get_unihash(self, task): | ||
299 | import urllib | ||
300 | import json | ||
301 | |||
302 | taskhash = self.taskhash[task] | ||
303 | |||
304 | key = self.__get_task_unihash_key(task) | ||
305 | |||
306 | # TODO: This cache can grow unbounded. It probably only needs to keep | ||
307 | # for each task | ||
308 | unihash = self.unihashes.get(key) | ||
309 | if unihash is not None: | ||
310 | return unihash | ||
311 | |||
312 | # In the absence of being able to discover a unique hash from the | ||
313 | # server, make it be equivalent to the taskhash. The unique "hash" only | ||
314 | # really needs to be a unique string (not even necessarily a hash), but | ||
315 | # making it match the taskhash has a few advantages: | ||
316 | # | ||
317 | # 1) All of the sstate code that assumes hashes can be the same | ||
318 | # 2) It provides maximal compatibility with builders that don't use | ||
319 | # an equivalency server | ||
320 | # 3) The value is easy for multiple independent builders to derive the | ||
321 | # same unique hash from the same input. This means that if the | ||
322 | # independent builders find the same taskhash, but it isn't reported | ||
323 | # to the server, there is a better chance that they will agree on | ||
324 | # the unique hash. | ||
325 | unihash = taskhash | ||
326 | |||
327 | try: | ||
328 | url = '%s/v1/equivalent?%s' % (self.server, | ||
329 | urllib.parse.urlencode({'method': self.method, 'taskhash': self.taskhash[task]})) | ||
330 | |||
331 | request = urllib.request.Request(url) | ||
332 | response = urllib.request.urlopen(request) | ||
333 | data = response.read().decode('utf-8') | ||
334 | |||
335 | json_data = json.loads(data) | ||
336 | |||
337 | if json_data: | ||
338 | unihash = json_data['unihash'] | ||
339 | # A unique hash equal to the taskhash is not very interesting, | ||
340 | # so it is reported it at debug level 2. If they differ, that | ||
341 | # is much more interesting, so it is reported at debug level 1 | ||
342 | bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, task, self.server)) | ||
343 | else: | ||
344 | bb.debug(2, 'No reported unihash for %s:%s from %s' % (task, taskhash, self.server)) | ||
345 | except urllib.error.URLError as e: | ||
346 | bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) | ||
347 | except (KeyError, json.JSONDecodeError) as e: | ||
348 | bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e))) | ||
349 | |||
350 | self.unihashes[key] = unihash | ||
351 | return unihash | ||
352 | |||
353 | def report_unihash(self, path, task, d): | ||
354 | import urllib | ||
355 | import json | ||
356 | import tempfile | ||
357 | import base64 | ||
358 | |||
359 | taskhash = d.getVar('BB_TASKHASH') | ||
360 | unihash = d.getVar('BB_UNIHASH') | ||
361 | report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' | ||
362 | tempdir = d.getVar('T') | ||
363 | fn = d.getVar('BB_FILENAME') | ||
364 | key = fn + '.do_' + task + ':' + taskhash | ||
365 | |||
366 | # Sanity checks | ||
367 | cache_unihash = self.unihashes.get(key) | ||
368 | if cache_unihash is None: | ||
369 | bb.fatal('%s not in unihash cache. Please report this error' % key) | ||
370 | |||
371 | if cache_unihash != unihash: | ||
372 | bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) | ||
373 | |||
374 | sigfile = None | ||
375 | sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) | ||
376 | sigfile_link = "depsig.do_%s" % task | ||
377 | |||
378 | try: | ||
379 | call = self.method + '(path, sigfile, task, d)' | ||
380 | sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') | ||
381 | locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} | ||
382 | |||
383 | outhash = bb.utils.better_eval(call, locs) | ||
384 | |||
385 | try: | ||
386 | url = '%s/v1/equivalent' % self.server | ||
387 | task_data = { | ||
388 | 'taskhash': taskhash, | ||
389 | 'method': self.method, | ||
390 | 'outhash': outhash, | ||
391 | 'unihash': unihash, | ||
392 | 'owner': d.getVar('SSTATE_HASHEQUIV_OWNER') | ||
393 | } | ||
394 | |||
395 | if report_taskdata: | ||
396 | sigfile.seek(0) | ||
397 | |||
398 | task_data['PN'] = d.getVar('PN') | ||
399 | task_data['PV'] = d.getVar('PV') | ||
400 | task_data['PR'] = d.getVar('PR') | ||
401 | task_data['task'] = task | ||
402 | task_data['outhash_siginfo'] = sigfile.read().decode('utf-8') | ||
403 | |||
404 | headers = {'content-type': 'application/json'} | ||
405 | |||
406 | request = urllib.request.Request(url, json.dumps(task_data).encode('utf-8'), headers) | ||
407 | response = urllib.request.urlopen(request) | ||
408 | data = response.read().decode('utf-8') | ||
409 | |||
410 | json_data = json.loads(data) | ||
411 | new_unihash = json_data['unihash'] | ||
412 | |||
413 | if new_unihash != unihash: | ||
414 | bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) | ||
415 | else: | ||
416 | bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) | ||
417 | except urllib.error.URLError as e: | ||
418 | bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) | ||
419 | except (KeyError, json.JSONDecodeError) as e: | ||
420 | bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e))) | ||
421 | finally: | ||
422 | if sigfile: | ||
423 | sigfile.close() | ||
424 | |||
425 | sigfile_link_path = os.path.join(tempdir, sigfile_link) | ||
426 | bb.utils.remove(sigfile_link_path) | ||
427 | |||
428 | try: | ||
429 | os.symlink(sigfile_name, sigfile_link_path) | ||
430 | except OSError: | ||
431 | pass | ||
266 | 432 | ||
267 | # Insert these classes into siggen's namespace so it can see and select them | 433 | # Insert these classes into siggen's namespace so it can see and select them |
268 | bb.siggen.SignatureGeneratorOEBasic = SignatureGeneratorOEBasic | 434 | bb.siggen.SignatureGeneratorOEBasic = SignatureGeneratorOEBasic |
269 | bb.siggen.SignatureGeneratorOEBasicHash = SignatureGeneratorOEBasicHash | 435 | bb.siggen.SignatureGeneratorOEBasicHash = SignatureGeneratorOEBasicHash |
436 | bb.siggen.SignatureGeneratorOEEquivHash = SignatureGeneratorOEEquivHash | ||
270 | 437 | ||
271 | 438 | ||
272 | def find_siginfo(pn, taskname, taskhashlist, d): | 439 | def find_siginfo(pn, taskname, taskhashlist, d): |