diff options
author | Alexis Lothoré <alexis.lothore@bootlin.com> | 2023-02-24 17:45:52 +0100 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2023-02-26 11:59:52 +0000 |
commit | 383cd86595c84a6b6e78bc9d15f10052d60d8193 (patch) | |
tree | e45d30571f21a78297a0daf8a48b4435657107a4 /scripts | |
parent | 88345ecf477f095ff7f8c4d22f4a919fa87ab116 (diff) | |
download | poky-383cd86595c84a6b6e78bc9d15f10052d60d8193.tar.gz |
scripts/resulttool/regression: add metadata filtering for oeselftest
When generating regression reports, many false positive can be observed since
some tests results are compared while the corresponding tests sets are not the
same, as it can be seen for example for oeselftest tests (oeselftest is run
multiple time but with different parameters, resulting in different tests sets)
Add a filtering mechanism in resulttool regression module to enable a better
matching between tests. The METADATA_MATCH_TABLE defines that when the TEST_TYPE
is "oeselftest", then resulttool should filter pairs based on
OESELFTEST_METADATA appended to test configuration. If metadata is absent from
test results, in order to keep compatibility with older results, add a
"guessing" mechanism to generate the missing OESELFTEST_METADATA. The guessed
data is tightly coupled to the autobuilder configuration, where all oe-selftest
executions are described
(From OE-Core rev: 94ab7c2b892bf292dd86619ca9c63ddd7bf53f3c)
Signed-off-by: Alexis Lothoré <alexis.lothore@bootlin.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/lib/resulttool/regression.py | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/scripts/lib/resulttool/regression.py b/scripts/lib/resulttool/regression.py index d0b0c31805..1b0c8335a3 100644 --- a/scripts/lib/resulttool/regression.py +++ b/scripts/lib/resulttool/regression.py | |||
@@ -11,6 +11,164 @@ import resulttool.resultutils as resultutils | |||
11 | from oeqa.utils.git import GitRepo | 11 | from oeqa.utils.git import GitRepo |
12 | import oeqa.utils.gitarchive as gitarchive | 12 | import oeqa.utils.gitarchive as gitarchive |
13 | 13 | ||
14 | METADATA_MATCH_TABLE = { | ||
15 | "oeselftest": "OESELFTEST_METADATA" | ||
16 | } | ||
17 | |||
18 | OESELFTEST_METADATA_GUESS_TABLE={ | ||
19 | "trigger-build-posttrigger": { | ||
20 | "run_all_tests": False, | ||
21 | "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"], | ||
22 | "skips": None, | ||
23 | "machine": None, | ||
24 | "select_tags":None, | ||
25 | "exclude_tags": None | ||
26 | }, | ||
27 | "reproducible": { | ||
28 | "run_all_tests": False, | ||
29 | "run_tests":["reproducible"], | ||
30 | "skips": None, | ||
31 | "machine": None, | ||
32 | "select_tags":None, | ||
33 | "exclude_tags": None | ||
34 | }, | ||
35 | "arch-qemu-quick": { | ||
36 | "run_all_tests": True, | ||
37 | "run_tests":None, | ||
38 | "skips": None, | ||
39 | "machine": None, | ||
40 | "select_tags":["machine"], | ||
41 | "exclude_tags": None | ||
42 | }, | ||
43 | "arch-qemu-full-x86-or-x86_64": { | ||
44 | "run_all_tests": True, | ||
45 | "run_tests":None, | ||
46 | "skips": None, | ||
47 | "machine": None, | ||
48 | "select_tags":["machine", "toolchain-system"], | ||
49 | "exclude_tags": None | ||
50 | }, | ||
51 | "arch-qemu-full-others": { | ||
52 | "run_all_tests": True, | ||
53 | "run_tests":None, | ||
54 | "skips": None, | ||
55 | "machine": None, | ||
56 | "select_tags":["machine", "toolchain-user"], | ||
57 | "exclude_tags": None | ||
58 | }, | ||
59 | "selftest": { | ||
60 | "run_all_tests": True, | ||
61 | "run_tests":None, | ||
62 | "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"], | ||
63 | "machine": None, | ||
64 | "select_tags":None, | ||
65 | "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] | ||
66 | }, | ||
67 | "bringup": { | ||
68 | "run_all_tests": True, | ||
69 | "run_tests":None, | ||
70 | "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"], | ||
71 | "machine": None, | ||
72 | "select_tags":None, | ||
73 | "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] | ||
74 | } | ||
75 | } | ||
76 | |||
77 | def test_has_at_least_one_matching_tag(test, tag_list): | ||
78 | return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"]) | ||
79 | |||
80 | def all_tests_have_at_least_one_matching_tag(results, tag_list): | ||
81 | return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items()) | ||
82 | |||
83 | def any_test_have_any_matching_tag(results, tag_list): | ||
84 | return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values()) | ||
85 | |||
86 | def have_skipped_test(result, test_prefix): | ||
87 | return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix)) | ||
88 | |||
89 | def have_all_tests_skipped(result, test_prefixes_list): | ||
90 | return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list) | ||
91 | |||
92 | def guess_oeselftest_metadata(results): | ||
93 | """ | ||
94 | When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content. | ||
95 | Check results for specific values (absence/presence of oetags, number and name of executed tests...), | ||
96 | and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA | ||
97 | to it to allow proper test filtering. | ||
98 | This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less, | ||
99 | as new tests will have OESELFTEST_METADATA properly appended at test reporting time | ||
100 | """ | ||
101 | |||
102 | if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results: | ||
103 | return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger'] | ||
104 | elif all(result.startswith("reproducible") for result in results): | ||
105 | return OESELFTEST_METADATA_GUESS_TABLE['reproducible'] | ||
106 | elif all_tests_have_at_least_one_matching_tag(results, ["machine"]): | ||
107 | return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick'] | ||
108 | elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]): | ||
109 | return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64'] | ||
110 | elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]): | ||
111 | return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others'] | ||
112 | elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]): | ||
113 | if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]): | ||
114 | return OESELFTEST_METADATA_GUESS_TABLE['selftest'] | ||
115 | elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]): | ||
116 | return OESELFTEST_METADATA_GUESS_TABLE['bringup'] | ||
117 | |||
118 | return None | ||
119 | |||
120 | |||
121 | def metadata_matches(base_configuration, target_configuration): | ||
122 | """ | ||
123 | For passed base and target, check test type. If test type matches one of | ||
124 | properties described in METADATA_MATCH_TABLE, compare metadata if it is | ||
125 | present in base. Return true if metadata matches, or if base lacks some | ||
126 | data (either TEST_TYPE or the corresponding metadata) | ||
127 | """ | ||
128 | test_type = base_configuration.get('TEST_TYPE') | ||
129 | if test_type not in METADATA_MATCH_TABLE: | ||
130 | return True | ||
131 | |||
132 | metadata_key = METADATA_MATCH_TABLE.get(test_type) | ||
133 | if target_configuration.get(metadata_key) != base_configuration.get(metadata_key): | ||
134 | return False | ||
135 | |||
136 | return True | ||
137 | |||
138 | |||
139 | def machine_matches(base_configuration, target_configuration): | ||
140 | return base_configuration.get('MACHINE') == target_configuration.get('MACHINE') | ||
141 | |||
142 | |||
143 | def can_be_compared(logger, base, target): | ||
144 | """ | ||
145 | Some tests are not relevant to be compared, for example some oeselftest | ||
146 | run with different tests sets or parameters. Return true if tests can be | ||
147 | compared | ||
148 | """ | ||
149 | base_configuration = base['configuration'] | ||
150 | target_configuration = target['configuration'] | ||
151 | |||
152 | # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results. | ||
153 | if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration: | ||
154 | guess = guess_oeselftest_metadata(base['result']) | ||
155 | if guess is None: | ||
156 | logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}") | ||
157 | else: | ||
158 | logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}") | ||
159 | base_configuration['OESELFTEST_METADATA'] = guess | ||
160 | if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration: | ||
161 | guess = guess_oeselftest_metadata(target['result']) | ||
162 | if guess is None: | ||
163 | logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}") | ||
164 | else: | ||
165 | logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}") | ||
166 | target_configuration['OESELFTEST_METADATA'] = guess | ||
167 | |||
168 | return metadata_matches(base_configuration, target_configuration) \ | ||
169 | and machine_matches(base_configuration, target_configuration) | ||
170 | |||
171 | |||
14 | def compare_result(logger, base_name, target_name, base_result, target_result): | 172 | def compare_result(logger, base_name, target_name, base_result, target_result): |
15 | base_result = base_result.get('result') | 173 | base_result = base_result.get('result') |
16 | target_result = target_result.get('result') | 174 | target_result = target_result.get('result') |
@@ -61,6 +219,8 @@ def regression_common(args, logger, base_results, target_results): | |||
61 | # removing any pairs which match | 219 | # removing any pairs which match |
62 | for c in base.copy(): | 220 | for c in base.copy(): |
63 | for b in target.copy(): | 221 | for b in target.copy(): |
222 | if not can_be_compared(logger, base_results[a][c], target_results[a][b]): | ||
223 | continue | ||
64 | res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) | 224 | res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) |
65 | if not res: | 225 | if not res: |
66 | matches.append(resstr) | 226 | matches.append(resstr) |
@@ -70,6 +230,8 @@ def regression_common(args, logger, base_results, target_results): | |||
70 | # Should only now see regressions, we may not be able to match multiple pairs directly | 230 | # Should only now see regressions, we may not be able to match multiple pairs directly |
71 | for c in base: | 231 | for c in base: |
72 | for b in target: | 232 | for b in target: |
233 | if not can_be_compared(logger, base_results[a][c], target_results[a][b]): | ||
234 | continue | ||
73 | res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) | 235 | res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) |
74 | if res: | 236 | if res: |
75 | regressions.append(resstr) | 237 | regressions.append(resstr) |