summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Purdie <richard.purdie@linuxfoundation.org>2020-02-20 13:57:50 +0000
committerRichard Purdie <richard.purdie@linuxfoundation.org>2020-02-21 09:39:00 +0000
commit325bc7438ea6356472734b32337081edd577c98f (patch)
tree65b648ac71ba61e5a17f107b5f13b1c693a35fbb
parente663d093150926f6b46bb0682cf918fe4d9c460e (diff)
downloadpoky-325bc7438ea6356472734b32337081edd577c98f.tar.gz
oeqa/logparser: Fix performance issues with ptest log parsing
On the autobuilder a ptest log with 2.1 million lines took around 18 hours to process. This is clearly crazy. We can tweak the processing code to: a) Stop repeatedly joining large strings together (append to a list instead) b) Use one startswith expression instead of multiple re.search() operations With this change it takes 18 hours down to around 12s. [YOCTO #13696] (From OE-Core rev: 01c9f40b979e36a53e789a6bedd89b6d9557dce3) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--meta/lib/oeqa/utils/logparser.py16
1 files changed, 14 insertions, 2 deletions
diff --git a/meta/lib/oeqa/utils/logparser.py b/meta/lib/oeqa/utils/logparser.py
index 7313df8ec3..5403721073 100644
--- a/meta/lib/oeqa/utils/logparser.py
+++ b/meta/lib/oeqa/utils/logparser.py
@@ -25,13 +25,20 @@ class PtestParser(object):
25 section_regex['exitcode'] = re.compile(r"^ERROR: Exit status is (.+)") 25 section_regex['exitcode'] = re.compile(r"^ERROR: Exit status is (.+)")
26 section_regex['timeout'] = re.compile(r"^TIMEOUT: .*/(.+)/ptest") 26 section_regex['timeout'] = re.compile(r"^TIMEOUT: .*/(.+)/ptest")
27 27
28 # Cache markers so we don't take the re.search() hit all the time.
29 markers = ("PASSED", "FAILED", "SKIPPED", "BEGIN:", "END:", "DURATION:", "ERROR: Exit", "TIMEOUT:")
30
28 def newsection(): 31 def newsection():
29 return { 'name': "No-section", 'log': "" } 32 return { 'name': "No-section", 'log': [] }
30 33
31 current_section = newsection() 34 current_section = newsection()
32 35
33 with open(logfile, errors='replace') as f: 36 with open(logfile, errors='replace') as f:
34 for line in f: 37 for line in f:
38 if not line.startswith(markers):
39 current_section['log'].append(line)
40 continue
41
35 result = section_regex['begin'].search(line) 42 result = section_regex['begin'].search(line)
36 if result: 43 if result:
37 current_section['name'] = result.group(1) 44 current_section['name'] = result.group(1)
@@ -61,7 +68,7 @@ class PtestParser(object):
61 current_section[t] = result.group(1) 68 current_section[t] = result.group(1)
62 continue 69 continue
63 70
64 current_section['log'] = current_section['log'] + line 71 current_section['log'].append(line)
65 72
66 for t in test_regex: 73 for t in test_regex:
67 result = test_regex[t].search(line) 74 result = test_regex[t].search(line)
@@ -70,6 +77,11 @@ class PtestParser(object):
70 self.results[current_section['name']] = {} 77 self.results[current_section['name']] = {}
71 self.results[current_section['name']][result.group(1).strip()] = t 78 self.results[current_section['name']][result.group(1).strip()] = t
72 79
80 # Python performance for repeatedly joining long strings is poor, do it all at once at the end.
81 # For 2.1 million lines in a log this reduces 18 hours to 12s.
82 for section in self.sections:
83 self.sections[section]['log'] = "".join(self.sections[section]['log'])
84
73 return self.results, self.sections 85 return self.results, self.sections
74 86
75 # Log the results as files. The file name is the section name and the contents are the tests in that section. 87 # Log the results as files. The file name is the section name and the contents are the tests in that section.