summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEtienne Cordonnier <ecordonnier@snap.com>2023-02-01 15:19:00 +0100
committerRichard Purdie <richard.purdie@linuxfoundation.org>2023-02-17 15:05:12 +0000
commit3a60944caf433fe0f69b6d61ddfcd4ff8133f3f7 (patch)
tree6608511c12d8de40082ed149890a492872b719af
parent68e6eceb98de3c082bb0eca44510d95236b83ac3 (diff)
downloadpoky-3a60944caf433fe0f69b6d61ddfcd4ff8133f3f7.tar.gz
bitbake: siggen: Fix inefficient string concatenation
As discussed in https://stackoverflow.com/a/4435752/1710392 , CPython has an optimization for statements in the form "a = a + b" or "a += b". It seems that this line does not get optimized, because it has a form a = a + b + c: data = data + "./" + f.split("/./")[1] For that reason, it does a copy of data for each iteration, potentially copying megabytes of data for each iteration. Changing this line causes SignatureGeneratorBasic::get_taskhash to take 0.06 seconds instead of 45 seconds on my test setup where SRC_URI points to a big directory. Note that PEP8 recommends explicitely not to use this optimization which is specific to CPython: "do not rely on CPython’s efficient implementation of in-place string concatenation for statements in the form a += b or a = a + b" However, the PEP8 recommended form using "join()" also does not avoid the copy and takes 45 seconds in my test setup: data = ''.join((data, "./", f.split("/./")[1])) I have changed the other lines to also use += for consistency only, however those were in the form a = a + b and were optimized already. Co-authored-by: JJ Robertson <jrobertson@snap.com> (Bitbake rev: 592ee222a1c6da42925fb56801f226884b6724ec) Signed-off-by: Etienne Cordonnier <ecordonnier@snap.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org> (cherry picked from commit 195750f2ca355e29d51219c58ecb2c1d83692717) Signed-off-by: Steve Sakoman <steve@sakoman.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--bitbake/lib/bb/siggen.py10
1 files changed, 5 insertions, 5 deletions
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py
index 07bb529452..dd7039e5de 100644
--- a/bitbake/lib/bb/siggen.py
+++ b/bitbake/lib/bb/siggen.py
@@ -332,19 +332,19 @@ class SignatureGeneratorBasic(SignatureGenerator):
332 332
333 data = self.basehash[tid] 333 data = self.basehash[tid]
334 for dep in self.runtaskdeps[tid]: 334 for dep in self.runtaskdeps[tid]:
335 data = data + self.get_unihash(dep) 335 data += self.get_unihash(dep)
336 336
337 for (f, cs) in self.file_checksum_values[tid]: 337 for (f, cs) in self.file_checksum_values[tid]:
338 if cs: 338 if cs:
339 if "/./" in f: 339 if "/./" in f:
340 data = data + "./" + f.split("/./")[1] 340 data += "./" + f.split("/./")[1]
341 data = data + cs 341 data += cs
342 342
343 if tid in self.taints: 343 if tid in self.taints:
344 if self.taints[tid].startswith("nostamp:"): 344 if self.taints[tid].startswith("nostamp:"):
345 data = data + self.taints[tid][8:] 345 data += self.taints[tid][8:]
346 else: 346 else:
347 data = data + self.taints[tid] 347 data += self.taints[tid]
348 348
349 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 349 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
350 self.taskhash[tid] = h 350 self.taskhash[tid] = h