From ee176ddba8739301bea6ca990783773b2e3fbdcb Mon Sep 17 00:00:00 2001 From: Ross Burton Date: Mon, 13 Aug 2018 19:02:25 +0100 Subject: bitbake: utils/md5_file: don't iterate line-by-line Opening a file in binary mode and iterating it seems like the simple solution but will still break on newlines, which for binary files isn't really useful as the size of the chunks could be huge or tiny. Instead, let's be a bit more clever: we'll be MD5ing lots of files, but we don't want to fill up memory: use mmap() to open the file and read the file in 8k blocks. (Bitbake rev: 41e6161c8ce8cc90ebc93d72852673ae60fac923) Signed-off-by: Ross Burton Signed-off-by: Richard Purdie --- bitbake/lib/bb/utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'bitbake/lib') diff --git a/bitbake/lib/bb/utils.py b/bitbake/lib/bb/utils.py index 378e699e0c..2ff7e82222 100644 --- a/bitbake/lib/bb/utils.py +++ b/bitbake/lib/bb/utils.py @@ -524,12 +524,17 @@ def md5_file(filename): """ Return the hex string representation of the MD5 checksum of filename. """ - import hashlib - m = hashlib.md5() + import hashlib, mmap with open(filename, "rb") as f: - for line in f: - m.update(line) + m = hashlib.md5() + try: + with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm: + for chunk in iter(lambda: mm.read(8192), b''): + m.update(chunk) + except ValueError: + # You can't mmap() an empty file so silence this exception + pass return m.hexdigest() def sha256_file(filename): -- cgit v1.2.3-54-g00ecf