diff options
Diffstat (limited to 'scripts/lib/resulttool')
-rw-r--r-- | scripts/lib/resulttool/junit.py | 77 | ||||
-rw-r--r-- | scripts/lib/resulttool/log.py | 13 | ||||
-rwxr-xr-x | scripts/lib/resulttool/manualexecution.py | 2 | ||||
-rw-r--r-- | scripts/lib/resulttool/regression.py | 284 | ||||
-rw-r--r-- | scripts/lib/resulttool/report.py | 7 | ||||
-rw-r--r-- | scripts/lib/resulttool/resultutils.py | 84 | ||||
-rw-r--r-- | scripts/lib/resulttool/store.py | 27 |
7 files changed, 456 insertions, 38 deletions
diff --git a/scripts/lib/resulttool/junit.py b/scripts/lib/resulttool/junit.py new file mode 100644 index 0000000000..c7a53dc550 --- /dev/null +++ b/scripts/lib/resulttool/junit.py | |||
@@ -0,0 +1,77 @@ | |||
1 | # resulttool - report test results in JUnit XML format | ||
2 | # | ||
3 | # Copyright (c) 2024, Siemens AG. | ||
4 | # | ||
5 | # SPDX-License-Identifier: GPL-2.0-only | ||
6 | # | ||
7 | |||
8 | import os | ||
9 | import re | ||
10 | import xml.etree.ElementTree as ET | ||
11 | import resulttool.resultutils as resultutils | ||
12 | |||
13 | def junit(args, logger): | ||
14 | testresults = resultutils.load_resultsdata(args.json_file, configmap=resultutils.store_map) | ||
15 | |||
16 | total_time = 0 | ||
17 | skipped = 0 | ||
18 | failures = 0 | ||
19 | errors = 0 | ||
20 | |||
21 | for tests in testresults.values(): | ||
22 | results = tests[next(reversed(tests))].get("result", {}) | ||
23 | |||
24 | for result_id, result in results.items(): | ||
25 | # filter out ptestresult.rawlogs and ptestresult.sections | ||
26 | if re.search(r'\.test_', result_id): | ||
27 | total_time += result.get("duration", 0) | ||
28 | |||
29 | if result['status'] == "FAILED": | ||
30 | failures += 1 | ||
31 | elif result['status'] == "ERROR": | ||
32 | errors += 1 | ||
33 | elif result['status'] == "SKIPPED": | ||
34 | skipped += 1 | ||
35 | |||
36 | testsuites_node = ET.Element("testsuites") | ||
37 | testsuites_node.set("time", "%s" % total_time) | ||
38 | testsuite_node = ET.SubElement(testsuites_node, "testsuite") | ||
39 | testsuite_node.set("name", "Testimage") | ||
40 | testsuite_node.set("time", "%s" % total_time) | ||
41 | testsuite_node.set("tests", "%s" % len(results)) | ||
42 | testsuite_node.set("failures", "%s" % failures) | ||
43 | testsuite_node.set("errors", "%s" % errors) | ||
44 | testsuite_node.set("skipped", "%s" % skipped) | ||
45 | |||
46 | for result_id, result in results.items(): | ||
47 | if re.search(r'\.test_', result_id): | ||
48 | testcase_node = ET.SubElement(testsuite_node, "testcase", { | ||
49 | "name": result_id, | ||
50 | "classname": "Testimage", | ||
51 | "time": str(result['duration']) | ||
52 | }) | ||
53 | if result['status'] == "SKIPPED": | ||
54 | ET.SubElement(testcase_node, "skipped", message=result['log']) | ||
55 | elif result['status'] == "FAILED": | ||
56 | ET.SubElement(testcase_node, "failure", message=result['log']) | ||
57 | elif result['status'] == "ERROR": | ||
58 | ET.SubElement(testcase_node, "error", message=result['log']) | ||
59 | |||
60 | tree = ET.ElementTree(testsuites_node) | ||
61 | |||
62 | if args.junit_xml_path is None: | ||
63 | args.junit_xml_path = os.environ['BUILDDIR'] + '/tmp/log/oeqa/junit.xml' | ||
64 | tree.write(args.junit_xml_path, encoding='UTF-8', xml_declaration=True) | ||
65 | |||
66 | logger.info('Saved JUnit XML report as %s' % args.junit_xml_path) | ||
67 | |||
68 | def register_commands(subparsers): | ||
69 | """Register subcommands from this plugin""" | ||
70 | parser_build = subparsers.add_parser('junit', help='create test report in JUnit XML format', | ||
71 | description='generate unit test report in JUnit XML format based on the latest test results in the testresults.json.', | ||
72 | group='analysis') | ||
73 | parser_build.set_defaults(func=junit) | ||
74 | parser_build.add_argument('json_file', | ||
75 | help='json file should point to the testresults.json') | ||
76 | parser_build.add_argument('-j', '--junit_xml_path', | ||
77 | help='junit xml path allows setting the path of the generated test report. The default location is <build_dir>/tmp/log/oeqa/junit.xml') | ||
diff --git a/scripts/lib/resulttool/log.py b/scripts/lib/resulttool/log.py index eb3927ec82..15148ca288 100644 --- a/scripts/lib/resulttool/log.py +++ b/scripts/lib/resulttool/log.py | |||
@@ -28,12 +28,10 @@ def show_reproducible(result, reproducible, logger): | |||
28 | def log(args, logger): | 28 | def log(args, logger): |
29 | results = resultutils.load_resultsdata(args.source) | 29 | results = resultutils.load_resultsdata(args.source) |
30 | 30 | ||
31 | ptest_count = sum(1 for _, _, _, r in resultutils.test_run_results(results) if 'ptestresult.sections' in r) | ||
32 | if ptest_count > 1 and not args.prepend_run: | ||
33 | print("%i ptest sections found. '--prepend-run' is required" % ptest_count) | ||
34 | return 1 | ||
35 | |||
36 | for _, run_name, _, r in resultutils.test_run_results(results): | 31 | for _, run_name, _, r in resultutils.test_run_results(results): |
32 | if args.list_ptest: | ||
33 | print('\n'.join(sorted(r['ptestresult.sections'].keys()))) | ||
34 | |||
37 | if args.dump_ptest: | 35 | if args.dump_ptest: |
38 | for sectname in ['ptestresult.sections', 'ltpposixresult.sections', 'ltpresult.sections']: | 36 | for sectname in ['ptestresult.sections', 'ltpposixresult.sections', 'ltpresult.sections']: |
39 | if sectname in r: | 37 | if sectname in r: |
@@ -48,6 +46,9 @@ def log(args, logger): | |||
48 | 46 | ||
49 | os.makedirs(dest_dir, exist_ok=True) | 47 | os.makedirs(dest_dir, exist_ok=True) |
50 | dest = os.path.join(dest_dir, '%s.log' % name) | 48 | dest = os.path.join(dest_dir, '%s.log' % name) |
49 | if os.path.exists(dest): | ||
50 | print("Overlapping ptest logs found, skipping %s. The '--prepend-run' option would avoid this" % name) | ||
51 | continue | ||
51 | print(dest) | 52 | print(dest) |
52 | with open(dest, 'w') as f: | 53 | with open(dest, 'w') as f: |
53 | f.write(logdata) | 54 | f.write(logdata) |
@@ -86,6 +87,8 @@ def register_commands(subparsers): | |||
86 | parser.set_defaults(func=log) | 87 | parser.set_defaults(func=log) |
87 | parser.add_argument('source', | 88 | parser.add_argument('source', |
88 | help='the results file/directory/URL to import') | 89 | help='the results file/directory/URL to import') |
90 | parser.add_argument('--list-ptest', action='store_true', | ||
91 | help='list the ptest test names') | ||
89 | parser.add_argument('--ptest', action='append', default=[], | 92 | parser.add_argument('--ptest', action='append', default=[], |
90 | help='show logs for a ptest') | 93 | help='show logs for a ptest') |
91 | parser.add_argument('--dump-ptest', metavar='DIR', | 94 | parser.add_argument('--dump-ptest', metavar='DIR', |
diff --git a/scripts/lib/resulttool/manualexecution.py b/scripts/lib/resulttool/manualexecution.py index ecb27c5933..ae0861ac6b 100755 --- a/scripts/lib/resulttool/manualexecution.py +++ b/scripts/lib/resulttool/manualexecution.py | |||
@@ -22,7 +22,7 @@ def load_json_file(f): | |||
22 | def write_json_file(f, json_data): | 22 | def write_json_file(f, json_data): |
23 | os.makedirs(os.path.dirname(f), exist_ok=True) | 23 | os.makedirs(os.path.dirname(f), exist_ok=True) |
24 | with open(f, 'w') as filedata: | 24 | with open(f, 'w') as filedata: |
25 | filedata.write(json.dumps(json_data, sort_keys=True, indent=4)) | 25 | filedata.write(json.dumps(json_data, sort_keys=True, indent=1)) |
26 | 26 | ||
27 | class ManualTestRunner(object): | 27 | class ManualTestRunner(object): |
28 | 28 | ||
diff --git a/scripts/lib/resulttool/regression.py b/scripts/lib/resulttool/regression.py index 9f952951b3..33b3119c54 100644 --- a/scripts/lib/resulttool/regression.py +++ b/scripts/lib/resulttool/regression.py | |||
@@ -7,17 +7,213 @@ | |||
7 | # | 7 | # |
8 | 8 | ||
9 | import resulttool.resultutils as resultutils | 9 | import resulttool.resultutils as resultutils |
10 | import json | ||
11 | 10 | ||
12 | from oeqa.utils.git import GitRepo | 11 | from oeqa.utils.git import GitRepo |
13 | import oeqa.utils.gitarchive as gitarchive | 12 | import oeqa.utils.gitarchive as gitarchive |
14 | 13 | ||
15 | def compare_result(logger, base_name, target_name, base_result, target_result): | 14 | METADATA_MATCH_TABLE = { |
15 | "oeselftest": "OESELFTEST_METADATA" | ||
16 | } | ||
17 | |||
18 | OESELFTEST_METADATA_GUESS_TABLE={ | ||
19 | "trigger-build-posttrigger": { | ||
20 | "run_all_tests": False, | ||
21 | "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"], | ||
22 | "skips": None, | ||
23 | "machine": None, | ||
24 | "select_tags":None, | ||
25 | "exclude_tags": None | ||
26 | }, | ||
27 | "reproducible": { | ||
28 | "run_all_tests": False, | ||
29 | "run_tests":["reproducible"], | ||
30 | "skips": None, | ||
31 | "machine": None, | ||
32 | "select_tags":None, | ||
33 | "exclude_tags": None | ||
34 | }, | ||
35 | "arch-qemu-quick": { | ||
36 | "run_all_tests": True, | ||
37 | "run_tests":None, | ||
38 | "skips": None, | ||
39 | "machine": None, | ||
40 | "select_tags":["machine"], | ||
41 | "exclude_tags": None | ||
42 | }, | ||
43 | "arch-qemu-full-x86-or-x86_64": { | ||
44 | "run_all_tests": True, | ||
45 | "run_tests":None, | ||
46 | "skips": None, | ||
47 | "machine": None, | ||
48 | "select_tags":["machine", "toolchain-system"], | ||
49 | "exclude_tags": None | ||
50 | }, | ||
51 | "arch-qemu-full-others": { | ||
52 | "run_all_tests": True, | ||
53 | "run_tests":None, | ||
54 | "skips": None, | ||
55 | "machine": None, | ||
56 | "select_tags":["machine", "toolchain-user"], | ||
57 | "exclude_tags": None | ||
58 | }, | ||
59 | "selftest": { | ||
60 | "run_all_tests": True, | ||
61 | "run_tests":None, | ||
62 | "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"], | ||
63 | "machine": None, | ||
64 | "select_tags":None, | ||
65 | "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] | ||
66 | }, | ||
67 | "bringup": { | ||
68 | "run_all_tests": True, | ||
69 | "run_tests":None, | ||
70 | "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"], | ||
71 | "machine": None, | ||
72 | "select_tags":None, | ||
73 | "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] | ||
74 | } | ||
75 | } | ||
76 | |||
77 | STATUS_STRINGS = { | ||
78 | "None": "No matching test result" | ||
79 | } | ||
80 | |||
81 | REGRESSIONS_DISPLAY_LIMIT=50 | ||
82 | |||
83 | MISSING_TESTS_BANNER = "-------------------------- Missing tests --------------------------" | ||
84 | ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------" | ||
85 | |||
86 | def test_has_at_least_one_matching_tag(test, tag_list): | ||
87 | return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"]) | ||
88 | |||
89 | def all_tests_have_at_least_one_matching_tag(results, tag_list): | ||
90 | return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items()) | ||
91 | |||
92 | def any_test_have_any_matching_tag(results, tag_list): | ||
93 | return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values()) | ||
94 | |||
95 | def have_skipped_test(result, test_prefix): | ||
96 | return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix)) | ||
97 | |||
98 | def have_all_tests_skipped(result, test_prefixes_list): | ||
99 | return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list) | ||
100 | |||
101 | def guess_oeselftest_metadata(results): | ||
102 | """ | ||
103 | When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content. | ||
104 | Check results for specific values (absence/presence of oetags, number and name of executed tests...), | ||
105 | and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA | ||
106 | to it to allow proper test filtering. | ||
107 | This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less, | ||
108 | as new tests will have OESELFTEST_METADATA properly appended at test reporting time | ||
109 | """ | ||
110 | |||
111 | if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results: | ||
112 | return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger'] | ||
113 | elif all(result.startswith("reproducible") for result in results): | ||
114 | return OESELFTEST_METADATA_GUESS_TABLE['reproducible'] | ||
115 | elif all_tests_have_at_least_one_matching_tag(results, ["machine"]): | ||
116 | return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick'] | ||
117 | elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]): | ||
118 | return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64'] | ||
119 | elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]): | ||
120 | return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others'] | ||
121 | elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]): | ||
122 | if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]): | ||
123 | return OESELFTEST_METADATA_GUESS_TABLE['selftest'] | ||
124 | elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]): | ||
125 | return OESELFTEST_METADATA_GUESS_TABLE['bringup'] | ||
126 | |||
127 | return None | ||
128 | |||
129 | |||
130 | def metadata_matches(base_configuration, target_configuration): | ||
131 | """ | ||
132 | For passed base and target, check test type. If test type matches one of | ||
133 | properties described in METADATA_MATCH_TABLE, compare metadata if it is | ||
134 | present in base. Return true if metadata matches, or if base lacks some | ||
135 | data (either TEST_TYPE or the corresponding metadata) | ||
136 | """ | ||
137 | test_type = base_configuration.get('TEST_TYPE') | ||
138 | if test_type not in METADATA_MATCH_TABLE: | ||
139 | return True | ||
140 | |||
141 | metadata_key = METADATA_MATCH_TABLE.get(test_type) | ||
142 | if target_configuration.get(metadata_key) != base_configuration.get(metadata_key): | ||
143 | return False | ||
144 | |||
145 | return True | ||
146 | |||
147 | |||
148 | def machine_matches(base_configuration, target_configuration): | ||
149 | return base_configuration.get('MACHINE') == target_configuration.get('MACHINE') | ||
150 | |||
151 | |||
152 | def can_be_compared(logger, base, target): | ||
153 | """ | ||
154 | Some tests are not relevant to be compared, for example some oeselftest | ||
155 | run with different tests sets or parameters. Return true if tests can be | ||
156 | compared | ||
157 | """ | ||
158 | ret = True | ||
159 | base_configuration = base['configuration'] | ||
160 | target_configuration = target['configuration'] | ||
161 | |||
162 | # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results. | ||
163 | if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration: | ||
164 | guess = guess_oeselftest_metadata(base['result']) | ||
165 | if guess is None: | ||
166 | logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}") | ||
167 | else: | ||
168 | logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}") | ||
169 | base_configuration['OESELFTEST_METADATA'] = guess | ||
170 | if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration: | ||
171 | guess = guess_oeselftest_metadata(target['result']) | ||
172 | if guess is None: | ||
173 | logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}") | ||
174 | else: | ||
175 | logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}") | ||
176 | target_configuration['OESELFTEST_METADATA'] = guess | ||
177 | |||
178 | # Test runs with LTP results in should only be compared with other runs with LTP tests in them | ||
179 | if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']): | ||
180 | ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result']) | ||
181 | |||
182 | return ret and metadata_matches(base_configuration, target_configuration) \ | ||
183 | and machine_matches(base_configuration, target_configuration) | ||
184 | |||
185 | def get_status_str(raw_status): | ||
186 | raw_status_lower = raw_status.lower() if raw_status else "None" | ||
187 | return STATUS_STRINGS.get(raw_status_lower, raw_status) | ||
188 | |||
189 | def get_additional_info_line(new_pass_count, new_tests): | ||
190 | result=[] | ||
191 | if new_tests: | ||
192 | result.append(f'+{new_tests} test(s) present') | ||
193 | if new_pass_count: | ||
194 | result.append(f'+{new_pass_count} test(s) now passing') | ||
195 | |||
196 | if not result: | ||
197 | return "" | ||
198 | |||
199 | return ' -> ' + ', '.join(result) + '\n' | ||
200 | |||
201 | def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None): | ||
16 | base_result = base_result.get('result') | 202 | base_result = base_result.get('result') |
17 | target_result = target_result.get('result') | 203 | target_result = target_result.get('result') |
18 | result = {} | 204 | result = {} |
205 | new_tests = 0 | ||
206 | regressions = {} | ||
207 | resultstring = "" | ||
208 | new_tests = 0 | ||
209 | new_pass_count = 0 | ||
210 | |||
211 | display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT | ||
212 | |||
19 | if base_result and target_result: | 213 | if base_result and target_result: |
20 | for k in base_result: | 214 | for k in base_result: |
215 | if k in ['ptestresult.rawlogs', 'ptestresult.sections']: | ||
216 | continue | ||
21 | base_testcase = base_result[k] | 217 | base_testcase = base_result[k] |
22 | base_status = base_testcase.get('status') | 218 | base_status = base_testcase.get('status') |
23 | if base_status: | 219 | if base_status: |
@@ -27,12 +223,47 @@ def compare_result(logger, base_name, target_name, base_result, target_result): | |||
27 | result[k] = {'base': base_status, 'target': target_status} | 223 | result[k] = {'base': base_status, 'target': target_status} |
28 | else: | 224 | else: |
29 | logger.error('Failed to retrieved base test case status: %s' % k) | 225 | logger.error('Failed to retrieved base test case status: %s' % k) |
226 | |||
227 | # Also count new tests that were not present in base results: it | ||
228 | # could be newly added tests, but it could also highlights some tests | ||
229 | # renames or fixed faulty ptests | ||
230 | for k in target_result: | ||
231 | if k not in base_result: | ||
232 | new_tests += 1 | ||
30 | if result: | 233 | if result: |
31 | resultstring = "Regression: %s\n %s\n" % (base_name, target_name) | 234 | new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values()) |
32 | for k in sorted(result): | 235 | # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...) |
33 | resultstring += ' %s: %s -> %s\n' % (k, result[k]['base'], result[k]['target']) | 236 | if new_pass_count < len(result): |
237 | resultstring = "Regression: %s\n %s\n" % (base_name, target_name) | ||
238 | for k in sorted(result): | ||
239 | if not result[k]['target'] or not result[k]['target'].startswith("PASS"): | ||
240 | # Differentiate each ptest kind when listing regressions | ||
241 | key_parts = k.split('.') | ||
242 | key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0] | ||
243 | # Append new regression to corresponding test family | ||
244 | regressions[key] = regressions.setdefault(key, []) + [' %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))] | ||
245 | resultstring += f" Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n" | ||
246 | for k in regressions: | ||
247 | resultstring += f" {len(regressions[k])} regression(s) for {k}\n" | ||
248 | count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k]) | ||
249 | resultstring += ''.join(regressions[k][:count_to_print]) | ||
250 | if count_to_print < len(regressions[k]): | ||
251 | resultstring+=' [...]\n' | ||
252 | if new_pass_count > 0: | ||
253 | resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n' | ||
254 | if new_tests > 0: | ||
255 | resultstring += f' Additionally, {new_tests} new test(s) is/are present\n' | ||
256 | else: | ||
257 | resultstring = "%s\n%s\n" % (base_name, target_name) | ||
258 | result = None | ||
34 | else: | 259 | else: |
35 | resultstring = "Match: %s\n %s" % (base_name, target_name) | 260 | resultstring = "%s\n%s\n" % (base_name, target_name) |
261 | |||
262 | if not result: | ||
263 | additional_info = get_additional_info_line(new_pass_count, new_tests) | ||
264 | if additional_info: | ||
265 | resultstring += additional_info | ||
266 | |||
36 | return result, resultstring | 267 | return result, resultstring |
37 | 268 | ||
38 | def get_results(logger, source): | 269 | def get_results(logger, source): |
@@ -44,12 +275,38 @@ def regression(args, logger): | |||
44 | 275 | ||
45 | regression_common(args, logger, base_results, target_results) | 276 | regression_common(args, logger, base_results, target_results) |
46 | 277 | ||
278 | # Some test case naming is poor and contains random strings, particularly lttng/babeltrace. | ||
279 | # Truncating the test names works since they contain file and line number identifiers | ||
280 | # which allows us to match them without the random components. | ||
281 | def fixup_ptest_names(results, logger): | ||
282 | for r in results: | ||
283 | for i in results[r]: | ||
284 | tests = list(results[r][i]['result'].keys()) | ||
285 | for test in tests: | ||
286 | new = None | ||
287 | if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test: | ||
288 | new = test.split("_-_")[0] | ||
289 | elif test.startswith(("ptestresult.curl.")) and "__" in test: | ||
290 | new = test.split("__")[0] | ||
291 | elif test.startswith(("ptestresult.dbus.")) and "__" in test: | ||
292 | new = test.split("__")[0] | ||
293 | elif test.startswith("ptestresult.binutils") and "build-st-" in test: | ||
294 | new = test.split(" ")[0] | ||
295 | elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test: | ||
296 | new = ".".join(test.split(".")[:2]) | ||
297 | if new: | ||
298 | results[r][i]['result'][new] = results[r][i]['result'][test] | ||
299 | del results[r][i]['result'][test] | ||
300 | |||
47 | def regression_common(args, logger, base_results, target_results): | 301 | def regression_common(args, logger, base_results, target_results): |
48 | if args.base_result_id: | 302 | if args.base_result_id: |
49 | base_results = resultutils.filter_resultsdata(base_results, args.base_result_id) | 303 | base_results = resultutils.filter_resultsdata(base_results, args.base_result_id) |
50 | if args.target_result_id: | 304 | if args.target_result_id: |
51 | target_results = resultutils.filter_resultsdata(target_results, args.target_result_id) | 305 | target_results = resultutils.filter_resultsdata(target_results, args.target_result_id) |
52 | 306 | ||
307 | fixup_ptest_names(base_results, logger) | ||
308 | fixup_ptest_names(target_results, logger) | ||
309 | |||
53 | matches = [] | 310 | matches = [] |
54 | regressions = [] | 311 | regressions = [] |
55 | notfound = [] | 312 | notfound = [] |
@@ -62,7 +319,9 @@ def regression_common(args, logger, base_results, target_results): | |||
62 | # removing any pairs which match | 319 | # removing any pairs which match |
63 | for c in base.copy(): | 320 | for c in base.copy(): |
64 | for b in target.copy(): | 321 | for b in target.copy(): |
65 | res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) | 322 | if not can_be_compared(logger, base_results[a][c], target_results[a][b]): |
323 | continue | ||
324 | res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit) | ||
66 | if not res: | 325 | if not res: |
67 | matches.append(resstr) | 326 | matches.append(resstr) |
68 | base.remove(c) | 327 | base.remove(c) |
@@ -71,15 +330,18 @@ def regression_common(args, logger, base_results, target_results): | |||
71 | # Should only now see regressions, we may not be able to match multiple pairs directly | 330 | # Should only now see regressions, we may not be able to match multiple pairs directly |
72 | for c in base: | 331 | for c in base: |
73 | for b in target: | 332 | for b in target: |
74 | res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) | 333 | if not can_be_compared(logger, base_results[a][c], target_results[a][b]): |
334 | continue | ||
335 | res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit) | ||
75 | if res: | 336 | if res: |
76 | regressions.append(resstr) | 337 | regressions.append(resstr) |
77 | else: | 338 | else: |
78 | notfound.append("%s not found in target" % a) | 339 | notfound.append("%s not found in target" % a) |
79 | print("\n".join(sorted(matches))) | ||
80 | print("\n".join(sorted(regressions))) | 340 | print("\n".join(sorted(regressions))) |
341 | print("\n" + MISSING_TESTS_BANNER + "\n") | ||
81 | print("\n".join(sorted(notfound))) | 342 | print("\n".join(sorted(notfound))) |
82 | 343 | print("\n" + ADDITIONAL_DATA_BANNER + "\n") | |
344 | print("\n".join(sorted(matches))) | ||
83 | return 0 | 345 | return 0 |
84 | 346 | ||
85 | def regression_git(args, logger): | 347 | def regression_git(args, logger): |
@@ -162,6 +424,7 @@ def register_commands(subparsers): | |||
162 | help='(optional) filter the base results to this result ID') | 424 | help='(optional) filter the base results to this result ID') |
163 | parser_build.add_argument('-t', '--target-result-id', default='', | 425 | parser_build.add_argument('-t', '--target-result-id', default='', |
164 | help='(optional) filter the target results to this result ID') | 426 | help='(optional) filter the target results to this result ID') |
427 | parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes") | ||
165 | 428 | ||
166 | parser_build = subparsers.add_parser('regression-git', help='regression git analysis', | 429 | parser_build = subparsers.add_parser('regression-git', help='regression git analysis', |
167 | description='regression analysis comparing base result set to target ' | 430 | description='regression analysis comparing base result set to target ' |
@@ -183,4 +446,5 @@ def register_commands(subparsers): | |||
183 | parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified") | 446 | parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified") |
184 | parser_build.add_argument('--commit2', help="Revision to compare with") | 447 | parser_build.add_argument('--commit2', help="Revision to compare with") |
185 | parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified") | 448 | parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified") |
449 | parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes") | ||
186 | 450 | ||
diff --git a/scripts/lib/resulttool/report.py b/scripts/lib/resulttool/report.py index f0ca50ebe2..1c100b00ab 100644 --- a/scripts/lib/resulttool/report.py +++ b/scripts/lib/resulttool/report.py | |||
@@ -176,7 +176,10 @@ class ResultsTextReport(object): | |||
176 | vals['sort'] = line['testseries'] + "_" + line['result_id'] | 176 | vals['sort'] = line['testseries'] + "_" + line['result_id'] |
177 | vals['failed_testcases'] = line['failed_testcases'] | 177 | vals['failed_testcases'] = line['failed_testcases'] |
178 | for k in cols: | 178 | for k in cols: |
179 | vals[k] = "%d (%s%%)" % (line[k], format(line[k] / total_tested * 100, '.0f')) | 179 | if total_tested: |
180 | vals[k] = "%d (%s%%)" % (line[k], format(line[k] / total_tested * 100, '.0f')) | ||
181 | else: | ||
182 | vals[k] = "0 (0%)" | ||
180 | for k in maxlen: | 183 | for k in maxlen: |
181 | if k in vals and len(vals[k]) > maxlen[k]: | 184 | if k in vals and len(vals[k]) > maxlen[k]: |
182 | maxlen[k] = len(vals[k]) | 185 | maxlen[k] = len(vals[k]) |
@@ -253,7 +256,7 @@ class ResultsTextReport(object): | |||
253 | if selected_test_case_only: | 256 | if selected_test_case_only: |
254 | print_selected_testcase_result(raw_results, selected_test_case_only) | 257 | print_selected_testcase_result(raw_results, selected_test_case_only) |
255 | else: | 258 | else: |
256 | print(json.dumps(raw_results, sort_keys=True, indent=4)) | 259 | print(json.dumps(raw_results, sort_keys=True, indent=1)) |
257 | else: | 260 | else: |
258 | print('Could not find raw test result for %s' % raw_test) | 261 | print('Could not find raw test result for %s' % raw_test) |
259 | return 0 | 262 | return 0 |
diff --git a/scripts/lib/resulttool/resultutils.py b/scripts/lib/resulttool/resultutils.py index 8917022d36..b8fc79a6ac 100644 --- a/scripts/lib/resulttool/resultutils.py +++ b/scripts/lib/resulttool/resultutils.py | |||
@@ -14,8 +14,11 @@ import scriptpath | |||
14 | import copy | 14 | import copy |
15 | import urllib.request | 15 | import urllib.request |
16 | import posixpath | 16 | import posixpath |
17 | import logging | ||
17 | scriptpath.add_oe_lib_path() | 18 | scriptpath.add_oe_lib_path() |
18 | 19 | ||
20 | logger = logging.getLogger('resulttool') | ||
21 | |||
19 | flatten_map = { | 22 | flatten_map = { |
20 | "oeselftest": [], | 23 | "oeselftest": [], |
21 | "runtime": [], | 24 | "runtime": [], |
@@ -31,13 +34,19 @@ regression_map = { | |||
31 | "manual": ['TEST_TYPE', 'TEST_MODULE', 'IMAGE_BASENAME', 'MACHINE'] | 34 | "manual": ['TEST_TYPE', 'TEST_MODULE', 'IMAGE_BASENAME', 'MACHINE'] |
32 | } | 35 | } |
33 | store_map = { | 36 | store_map = { |
34 | "oeselftest": ['TEST_TYPE'], | 37 | "oeselftest": ['TEST_TYPE', 'TESTSERIES', 'MACHINE'], |
35 | "runtime": ['TEST_TYPE', 'DISTRO', 'MACHINE', 'IMAGE_BASENAME'], | 38 | "runtime": ['TEST_TYPE', 'DISTRO', 'MACHINE', 'IMAGE_BASENAME'], |
36 | "sdk": ['TEST_TYPE', 'MACHINE', 'SDKMACHINE', 'IMAGE_BASENAME'], | 39 | "sdk": ['TEST_TYPE', 'MACHINE', 'SDKMACHINE', 'IMAGE_BASENAME'], |
37 | "sdkext": ['TEST_TYPE', 'MACHINE', 'SDKMACHINE', 'IMAGE_BASENAME'], | 40 | "sdkext": ['TEST_TYPE', 'MACHINE', 'SDKMACHINE', 'IMAGE_BASENAME'], |
38 | "manual": ['TEST_TYPE', 'TEST_MODULE', 'MACHINE', 'IMAGE_BASENAME'] | 41 | "manual": ['TEST_TYPE', 'TEST_MODULE', 'MACHINE', 'IMAGE_BASENAME'] |
39 | } | 42 | } |
40 | 43 | ||
44 | rawlog_sections = { | ||
45 | "ptestresult.rawlogs": "ptest", | ||
46 | "ltpresult.rawlogs": "ltp", | ||
47 | "ltpposixresult.rawlogs": "ltpposix" | ||
48 | } | ||
49 | |||
41 | def is_url(p): | 50 | def is_url(p): |
42 | """ | 51 | """ |
43 | Helper for determining if the given path is a URL | 52 | Helper for determining if the given path is a URL |
@@ -58,7 +67,11 @@ def append_resultsdata(results, f, configmap=store_map, configvars=extra_configv | |||
58 | testseries = posixpath.basename(posixpath.dirname(url.path)) | 67 | testseries = posixpath.basename(posixpath.dirname(url.path)) |
59 | else: | 68 | else: |
60 | with open(f, "r") as filedata: | 69 | with open(f, "r") as filedata: |
61 | data = json.load(filedata) | 70 | try: |
71 | data = json.load(filedata) | ||
72 | except json.decoder.JSONDecodeError: | ||
73 | print("Cannot decode {}. Possible corruption. Skipping.".format(f)) | ||
74 | data = "" | ||
62 | testseries = os.path.basename(os.path.dirname(f)) | 75 | testseries = os.path.basename(os.path.dirname(f)) |
63 | else: | 76 | else: |
64 | data = f | 77 | data = f |
@@ -104,21 +117,57 @@ def filter_resultsdata(results, resultid): | |||
104 | newresults[r][i] = results[r][i] | 117 | newresults[r][i] = results[r][i] |
105 | return newresults | 118 | return newresults |
106 | 119 | ||
107 | def strip_ptestresults(results): | 120 | def strip_logs(results): |
108 | newresults = copy.deepcopy(results) | 121 | newresults = copy.deepcopy(results) |
109 | #for a in newresults2: | ||
110 | # newresults = newresults2[a] | ||
111 | for res in newresults: | 122 | for res in newresults: |
112 | if 'result' not in newresults[res]: | 123 | if 'result' not in newresults[res]: |
113 | continue | 124 | continue |
114 | if 'ptestresult.rawlogs' in newresults[res]['result']: | 125 | for logtype in rawlog_sections: |
115 | del newresults[res]['result']['ptestresult.rawlogs'] | 126 | if logtype in newresults[res]['result']: |
127 | del newresults[res]['result'][logtype] | ||
116 | if 'ptestresult.sections' in newresults[res]['result']: | 128 | if 'ptestresult.sections' in newresults[res]['result']: |
117 | for i in newresults[res]['result']['ptestresult.sections']: | 129 | for i in newresults[res]['result']['ptestresult.sections']: |
118 | if 'log' in newresults[res]['result']['ptestresult.sections'][i]: | 130 | if 'log' in newresults[res]['result']['ptestresult.sections'][i]: |
119 | del newresults[res]['result']['ptestresult.sections'][i]['log'] | 131 | del newresults[res]['result']['ptestresult.sections'][i]['log'] |
120 | return newresults | 132 | return newresults |
121 | 133 | ||
134 | # For timing numbers, crazy amounts of precision don't make sense and just confuse | ||
135 | # the logs. For numbers over 1, trim to 3 decimal places, for numbers less than 1, | ||
136 | # trim to 4 significant digits | ||
137 | def trim_durations(results): | ||
138 | for res in results: | ||
139 | if 'result' not in results[res]: | ||
140 | continue | ||
141 | for entry in results[res]['result']: | ||
142 | if 'duration' in results[res]['result'][entry]: | ||
143 | duration = results[res]['result'][entry]['duration'] | ||
144 | if duration > 1: | ||
145 | results[res]['result'][entry]['duration'] = float("%.3f" % duration) | ||
146 | elif duration < 1: | ||
147 | results[res]['result'][entry]['duration'] = float("%.4g" % duration) | ||
148 | return results | ||
149 | |||
150 | def handle_cleanups(results): | ||
151 | # Remove pointless path duplication from old format reproducibility results | ||
152 | for res2 in results: | ||
153 | try: | ||
154 | section = results[res2]['result']['reproducible']['files'] | ||
155 | for pkgtype in section: | ||
156 | for filelist in section[pkgtype].copy(): | ||
157 | if section[pkgtype][filelist] and type(section[pkgtype][filelist][0]) == dict: | ||
158 | newlist = [] | ||
159 | for entry in section[pkgtype][filelist]: | ||
160 | newlist.append(entry["reference"].split("/./")[1]) | ||
161 | section[pkgtype][filelist] = newlist | ||
162 | |||
163 | except KeyError: | ||
164 | pass | ||
165 | # Remove pointless duplicate rawlogs data | ||
166 | try: | ||
167 | del results[res2]['result']['reproducible.rawlogs'] | ||
168 | except KeyError: | ||
169 | pass | ||
170 | |||
122 | def decode_log(logdata): | 171 | def decode_log(logdata): |
123 | if isinstance(logdata, str): | 172 | if isinstance(logdata, str): |
124 | return logdata | 173 | return logdata |
@@ -142,7 +191,7 @@ def generic_get_log(sectionname, results, section): | |||
142 | return decode_log(ptest['log']) | 191 | return decode_log(ptest['log']) |
143 | 192 | ||
144 | def ptestresult_get_log(results, section): | 193 | def ptestresult_get_log(results, section): |
145 | return generic_get_log('ptestresuls.sections', results, section) | 194 | return generic_get_log('ptestresult.sections', results, section) |
146 | 195 | ||
147 | def generic_get_rawlogs(sectname, results): | 196 | def generic_get_rawlogs(sectname, results): |
148 | if sectname not in results: | 197 | if sectname not in results: |
@@ -151,9 +200,6 @@ def generic_get_rawlogs(sectname, results): | |||
151 | return None | 200 | return None |
152 | return decode_log(results[sectname]['log']) | 201 | return decode_log(results[sectname]['log']) |
153 | 202 | ||
154 | def ptestresult_get_rawlogs(results): | ||
155 | return generic_get_rawlogs('ptestresult.rawlogs', results) | ||
156 | |||
157 | def save_resultsdata(results, destdir, fn="testresults.json", ptestjson=False, ptestlogs=False): | 203 | def save_resultsdata(results, destdir, fn="testresults.json", ptestjson=False, ptestlogs=False): |
158 | for res in results: | 204 | for res in results: |
159 | if res: | 205 | if res: |
@@ -163,16 +209,20 @@ def save_resultsdata(results, destdir, fn="testresults.json", ptestjson=False, p | |||
163 | os.makedirs(os.path.dirname(dst), exist_ok=True) | 209 | os.makedirs(os.path.dirname(dst), exist_ok=True) |
164 | resultsout = results[res] | 210 | resultsout = results[res] |
165 | if not ptestjson: | 211 | if not ptestjson: |
166 | resultsout = strip_ptestresults(results[res]) | 212 | resultsout = strip_logs(results[res]) |
213 | trim_durations(resultsout) | ||
214 | handle_cleanups(resultsout) | ||
167 | with open(dst, 'w') as f: | 215 | with open(dst, 'w') as f: |
168 | f.write(json.dumps(resultsout, sort_keys=True, indent=4)) | 216 | f.write(json.dumps(resultsout, sort_keys=True, indent=1)) |
169 | for res2 in results[res]: | 217 | for res2 in results[res]: |
170 | if ptestlogs and 'result' in results[res][res2]: | 218 | if ptestlogs and 'result' in results[res][res2]: |
171 | seriesresults = results[res][res2]['result'] | 219 | seriesresults = results[res][res2]['result'] |
172 | rawlogs = ptestresult_get_rawlogs(seriesresults) | 220 | for logtype in rawlog_sections: |
173 | if rawlogs is not None: | 221 | logdata = generic_get_rawlogs(logtype, seriesresults) |
174 | with open(dst.replace(fn, "ptest-raw.log"), "w+") as f: | 222 | if logdata is not None: |
175 | f.write(rawlogs) | 223 | logger.info("Extracting " + rawlog_sections[logtype] + "-raw.log") |
224 | with open(dst.replace(fn, rawlog_sections[logtype] + "-raw.log"), "w+") as f: | ||
225 | f.write(logdata) | ||
176 | if 'ptestresult.sections' in seriesresults: | 226 | if 'ptestresult.sections' in seriesresults: |
177 | for i in seriesresults['ptestresult.sections']: | 227 | for i in seriesresults['ptestresult.sections']: |
178 | sectionlog = ptestresult_get_log(seriesresults, i) | 228 | sectionlog = ptestresult_get_log(seriesresults, i) |
diff --git a/scripts/lib/resulttool/store.py b/scripts/lib/resulttool/store.py index e0951f0a8f..b143334e69 100644 --- a/scripts/lib/resulttool/store.py +++ b/scripts/lib/resulttool/store.py | |||
@@ -65,18 +65,35 @@ def store(args, logger): | |||
65 | 65 | ||
66 | for r in revisions: | 66 | for r in revisions: |
67 | results = revisions[r] | 67 | results = revisions[r] |
68 | if args.revision and r[0] != args.revision: | ||
69 | logger.info('skipping %s as non-matching' % r[0]) | ||
70 | continue | ||
68 | keywords = {'commit': r[0], 'branch': r[1], "commit_count": r[2]} | 71 | keywords = {'commit': r[0], 'branch': r[1], "commit_count": r[2]} |
69 | subprocess.check_call(["find", tempdir, "!", "-path", "./.git/*", "-delete"]) | 72 | subprocess.check_call(["find", tempdir, "-name", "testresults.json", "!", "-path", "./.git/*", "-delete"]) |
70 | resultutils.save_resultsdata(results, tempdir, ptestlogs=True) | 73 | resultutils.save_resultsdata(results, tempdir, ptestlogs=True) |
71 | 74 | ||
72 | logger.info('Storing test result into git repository %s' % args.git_dir) | 75 | logger.info('Storing test result into git repository %s' % args.git_dir) |
73 | 76 | ||
74 | gitarchive.gitarchive(tempdir, args.git_dir, False, False, | 77 | excludes = [] |
78 | if args.logfile_archive: | ||
79 | excludes = ['*.log', "*.log.zst"] | ||
80 | |||
81 | tagname = gitarchive.gitarchive(tempdir, args.git_dir, False, False, | ||
75 | "Results of {branch}:{commit}", "branch: {branch}\ncommit: {commit}", "{branch}", | 82 | "Results of {branch}:{commit}", "branch: {branch}\ncommit: {commit}", "{branch}", |
76 | False, "{branch}/{commit_count}-g{commit}/{tag_number}", | 83 | False, "{branch}/{commit_count}-g{commit}/{tag_number}", |
77 | 'Test run #{tag_number} of {branch}:{commit}', '', | 84 | 'Test run #{tag_number} of {branch}:{commit}', '', |
78 | [], [], False, keywords, logger) | 85 | excludes, [], False, keywords, logger) |
79 | 86 | ||
87 | if args.logfile_archive: | ||
88 | logdir = args.logfile_archive + "/" + tagname | ||
89 | shutil.copytree(tempdir, logdir) | ||
90 | os.chmod(logdir, 0o755) | ||
91 | for root, dirs, files in os.walk(logdir): | ||
92 | for name in files: | ||
93 | if not name.endswith(".log"): | ||
94 | continue | ||
95 | f = os.path.join(root, name) | ||
96 | subprocess.run(["zstd", f, "--rm"], check=True, capture_output=True) | ||
80 | finally: | 97 | finally: |
81 | subprocess.check_call(["rm", "-rf", tempdir]) | 98 | subprocess.check_call(["rm", "-rf", tempdir]) |
82 | 99 | ||
@@ -102,3 +119,7 @@ def register_commands(subparsers): | |||
102 | help='add executed-by configuration to each result file') | 119 | help='add executed-by configuration to each result file') |
103 | parser_build.add_argument('-t', '--extra-test-env', default='', | 120 | parser_build.add_argument('-t', '--extra-test-env', default='', |
104 | help='add extra test environment data to each result file configuration') | 121 | help='add extra test environment data to each result file configuration') |
122 | parser_build.add_argument('-r', '--revision', default='', | ||
123 | help='only store data for the specified revision') | ||
124 | parser_build.add_argument('-l', '--logfile-archive', default='', | ||
125 | help='directory to separately archive log files along with a copy of the results') | ||