summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorAlexis Lothoré <alexis.lothore@bootlin.com>2023-02-24 17:45:52 +0100
committerRichard Purdie <richard.purdie@linuxfoundation.org>2023-02-26 11:59:52 +0000
commit383cd86595c84a6b6e78bc9d15f10052d60d8193 (patch)
treee45d30571f21a78297a0daf8a48b4435657107a4 /scripts
parent88345ecf477f095ff7f8c4d22f4a919fa87ab116 (diff)
downloadpoky-383cd86595c84a6b6e78bc9d15f10052d60d8193.tar.gz
scripts/resulttool/regression: add metadata filtering for oeselftest
When generating regression reports, many false positive can be observed since some tests results are compared while the corresponding tests sets are not the same, as it can be seen for example for oeselftest tests (oeselftest is run multiple time but with different parameters, resulting in different tests sets) Add a filtering mechanism in resulttool regression module to enable a better matching between tests. The METADATA_MATCH_TABLE defines that when the TEST_TYPE is "oeselftest", then resulttool should filter pairs based on OESELFTEST_METADATA appended to test configuration. If metadata is absent from test results, in order to keep compatibility with older results, add a "guessing" mechanism to generate the missing OESELFTEST_METADATA. The guessed data is tightly coupled to the autobuilder configuration, where all oe-selftest executions are described (From OE-Core rev: 94ab7c2b892bf292dd86619ca9c63ddd7bf53f3c) Signed-off-by: Alexis Lothoré <alexis.lothore@bootlin.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'scripts')
-rw-r--r--scripts/lib/resulttool/regression.py162
1 files changed, 162 insertions, 0 deletions
diff --git a/scripts/lib/resulttool/regression.py b/scripts/lib/resulttool/regression.py
index d0b0c31805..1b0c8335a3 100644
--- a/scripts/lib/resulttool/regression.py
+++ b/scripts/lib/resulttool/regression.py
@@ -11,6 +11,164 @@ import resulttool.resultutils as resultutils
11from oeqa.utils.git import GitRepo 11from oeqa.utils.git import GitRepo
12import oeqa.utils.gitarchive as gitarchive 12import oeqa.utils.gitarchive as gitarchive
13 13
14METADATA_MATCH_TABLE = {
15 "oeselftest": "OESELFTEST_METADATA"
16}
17
18OESELFTEST_METADATA_GUESS_TABLE={
19 "trigger-build-posttrigger": {
20 "run_all_tests": False,
21 "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
22 "skips": None,
23 "machine": None,
24 "select_tags":None,
25 "exclude_tags": None
26 },
27 "reproducible": {
28 "run_all_tests": False,
29 "run_tests":["reproducible"],
30 "skips": None,
31 "machine": None,
32 "select_tags":None,
33 "exclude_tags": None
34 },
35 "arch-qemu-quick": {
36 "run_all_tests": True,
37 "run_tests":None,
38 "skips": None,
39 "machine": None,
40 "select_tags":["machine"],
41 "exclude_tags": None
42 },
43 "arch-qemu-full-x86-or-x86_64": {
44 "run_all_tests": True,
45 "run_tests":None,
46 "skips": None,
47 "machine": None,
48 "select_tags":["machine", "toolchain-system"],
49 "exclude_tags": None
50 },
51 "arch-qemu-full-others": {
52 "run_all_tests": True,
53 "run_tests":None,
54 "skips": None,
55 "machine": None,
56 "select_tags":["machine", "toolchain-user"],
57 "exclude_tags": None
58 },
59 "selftest": {
60 "run_all_tests": True,
61 "run_tests":None,
62 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
63 "machine": None,
64 "select_tags":None,
65 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
66 },
67 "bringup": {
68 "run_all_tests": True,
69 "run_tests":None,
70 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
71 "machine": None,
72 "select_tags":None,
73 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
74 }
75}
76
77def test_has_at_least_one_matching_tag(test, tag_list):
78 return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
79
80def all_tests_have_at_least_one_matching_tag(results, tag_list):
81 return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
82
83def any_test_have_any_matching_tag(results, tag_list):
84 return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
85
86def have_skipped_test(result, test_prefix):
87 return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
88
89def have_all_tests_skipped(result, test_prefixes_list):
90 return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
91
92def guess_oeselftest_metadata(results):
93 """
94 When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
95 Check results for specific values (absence/presence of oetags, number and name of executed tests...),
96 and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
97 to it to allow proper test filtering.
98 This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
99 as new tests will have OESELFTEST_METADATA properly appended at test reporting time
100 """
101
102 if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
103 return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
104 elif all(result.startswith("reproducible") for result in results):
105 return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
106 elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
107 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
108 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
109 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
110 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
111 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
112 elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
113 if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
114 return OESELFTEST_METADATA_GUESS_TABLE['selftest']
115 elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
116 return OESELFTEST_METADATA_GUESS_TABLE['bringup']
117
118 return None
119
120
121def metadata_matches(base_configuration, target_configuration):
122 """
123 For passed base and target, check test type. If test type matches one of
124 properties described in METADATA_MATCH_TABLE, compare metadata if it is
125 present in base. Return true if metadata matches, or if base lacks some
126 data (either TEST_TYPE or the corresponding metadata)
127 """
128 test_type = base_configuration.get('TEST_TYPE')
129 if test_type not in METADATA_MATCH_TABLE:
130 return True
131
132 metadata_key = METADATA_MATCH_TABLE.get(test_type)
133 if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
134 return False
135
136 return True
137
138
139def machine_matches(base_configuration, target_configuration):
140 return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
141
142
143def can_be_compared(logger, base, target):
144 """
145 Some tests are not relevant to be compared, for example some oeselftest
146 run with different tests sets or parameters. Return true if tests can be
147 compared
148 """
149 base_configuration = base['configuration']
150 target_configuration = target['configuration']
151
152 # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
153 if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
154 guess = guess_oeselftest_metadata(base['result'])
155 if guess is None:
156 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
157 else:
158 logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
159 base_configuration['OESELFTEST_METADATA'] = guess
160 if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
161 guess = guess_oeselftest_metadata(target['result'])
162 if guess is None:
163 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
164 else:
165 logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
166 target_configuration['OESELFTEST_METADATA'] = guess
167
168 return metadata_matches(base_configuration, target_configuration) \
169 and machine_matches(base_configuration, target_configuration)
170
171
14def compare_result(logger, base_name, target_name, base_result, target_result): 172def compare_result(logger, base_name, target_name, base_result, target_result):
15 base_result = base_result.get('result') 173 base_result = base_result.get('result')
16 target_result = target_result.get('result') 174 target_result = target_result.get('result')
@@ -61,6 +219,8 @@ def regression_common(args, logger, base_results, target_results):
61 # removing any pairs which match 219 # removing any pairs which match
62 for c in base.copy(): 220 for c in base.copy():
63 for b in target.copy(): 221 for b in target.copy():
222 if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
223 continue
64 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) 224 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
65 if not res: 225 if not res:
66 matches.append(resstr) 226 matches.append(resstr)
@@ -70,6 +230,8 @@ def regression_common(args, logger, base_results, target_results):
70 # Should only now see regressions, we may not be able to match multiple pairs directly 230 # Should only now see regressions, we may not be able to match multiple pairs directly
71 for c in base: 231 for c in base:
72 for b in target: 232 for b in target:
233 if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
234 continue
73 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) 235 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
74 if res: 236 if res:
75 regressions.append(resstr) 237 regressions.append(resstr)