scripts/resulttool/regression: add metadata filtering for oeselftest

When generating regression reports, many false positive can be observed since some tests results are compared while the corresponding tests sets are not the same, as it can be seen for example for oeselftest tests (oeselftest is run multiple time but with different parameters, resulting in different tests sets) Add a filtering mechanism in resulttool regression module to enable a better matching between tests. The METADATA_MATCH_TABLE defines that when the TEST_TYPE is "oeselftest", then resulttool should filter pairs based on OESELFTEST_METADATA appended to test configuration. If metadata is absent from test results, in order to keep compatibility with older results, add a "guessing" mechanism to generate the missing OESELFTEST_METADATA. The guessed data is tightly coupled to the autobuilder configuration, where all oe-selftest executions are described (From OE-Core rev: 94ab7c2b892bf292dd86619ca9c63ddd7bf53f3c) Signed-off-by: Alexis Lothoré <alexis.lothore@bootlin.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
author: Alexis Lothoré <alexis.lothore@bootlin.com> 2023-02-24 17:45:52 +0100
committer: Richard Purdie <richard.purdie@linuxfoundation.org> 2023-02-26 11:59:52 +0000
commit: 383cd86595c84a6b6e78bc9d15f10052d60d8193 (patch)
tree: e45d30571f21a78297a0daf8a48b4435657107a4 /scripts
parent: 88345ecf477f095ff7f8c4d22f4a919fa87ab116 (diff)
download: poky-383cd86595c84a6b6e78bc9d15f10052d60d8193.tar.gz
1 files changed, 162 insertions, 0 deletions
diff --git a/scripts/lib/resulttool/regression.py b/scripts/lib/resulttool/regression.py
index d0b0c31805..1b0c8335a3 100644
--- a/scripts/lib/resulttool/regression.py
+++ b/scripts/lib/resulttool/regression.py
@@ -11,6 +11,164 @@ import resulttool.resultutils as resultutils
 from oeqa.utils.git import GitRepo
 import oeqa.utils.gitarchive as gitarchive
+METADATA_MATCH_TABLE = {
+    "oeselftest": "OESELFTEST_METADATA"
+}
+OESELFTEST_METADATA_GUESS_TABLE={
+    "trigger-build-posttrigger": {
+        "run_all_tests": False,
+        "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
+        "skips": None,
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": None
+    },
+    "reproducible": {
+        "run_all_tests": False,
+        "run_tests":["reproducible"],
+        "skips": None,
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": None
+    },
+    "arch-qemu-quick": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": None,
+        "machine": None,
+        "select_tags":["machine"],
+        "exclude_tags": None
+    },
+    "arch-qemu-full-x86-or-x86_64": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": None,
+        "machine": None,
+        "select_tags":["machine", "toolchain-system"],
+        "exclude_tags": None
+    },
+    "arch-qemu-full-others": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": None,
+        "machine": None,
+        "select_tags":["machine", "toolchain-user"],
+        "exclude_tags": None
+    },
+    "selftest": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
+    },
+    "bringup": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
+    }
+}
+def test_has_at_least_one_matching_tag(test, tag_list):
+    return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
+def all_tests_have_at_least_one_matching_tag(results, tag_list):
+    return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
+def any_test_have_any_matching_tag(results, tag_list):
+    return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
+def have_skipped_test(result, test_prefix):
+    return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
+def have_all_tests_skipped(result, test_prefixes_list):
+    return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
+def guess_oeselftest_metadata(results):
+    """
+    When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
+    Check results for specific values (absence/presence of oetags, number and name of executed tests...),
+    and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
+    to it to allow proper test filtering.
+    This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
+    as new tests will have OESELFTEST_METADATA properly appended at test reporting time
+    """
+    if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
+        return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
+    elif all(result.startswith("reproducible") for result in results):
+        return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
+    elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
+        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
+    elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
+        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
+    elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
+        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
+    elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
+        if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
+            return OESELFTEST_METADATA_GUESS_TABLE['selftest']
+        elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
+            return OESELFTEST_METADATA_GUESS_TABLE['bringup']
+    return None
+def metadata_matches(base_configuration, target_configuration):
+    """
+    For passed base and target, check test type. If test type matches one of
+    properties described in METADATA_MATCH_TABLE, compare metadata if it is
+    present in base. Return true if metadata matches, or if base lacks some
+    data (either TEST_TYPE or the corresponding metadata)
+    """
+    test_type = base_configuration.get('TEST_TYPE')
+    if test_type not in METADATA_MATCH_TABLE:
+        return True
+    metadata_key = METADATA_MATCH_TABLE.get(test_type)
+    if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
+        return False
+    return True
+def machine_matches(base_configuration, target_configuration):
+    return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
+def can_be_compared(logger, base, target):
+    """
+    Some tests are not relevant to be compared, for example some oeselftest
+    run with different tests sets or parameters. Return true if tests can be
+    compared
+    """
+    base_configuration = base['configuration']
+    target_configuration = target['configuration']
+    # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
+    if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
+        guess = guess_oeselftest_metadata(base['result'])
+        if guess is None:
+            logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
+        else:
+            logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
+            base_configuration['OESELFTEST_METADATA'] = guess
+    if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
+        guess = guess_oeselftest_metadata(target['result'])
+        if guess is None:
+            logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
+        else:
+            logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
+            target_configuration['OESELFTEST_METADATA'] = guess
+    return metadata_matches(base_configuration, target_configuration) \
+        and machine_matches(base_configuration, target_configuration)
 def compare_result(logger, base_name, target_name, base_result, target_result):
    base_result = base_result.get('result')
    target_result = target_result.get('result')
@@ -61,6 +219,8 @@ def regression_common(args, logger, base_results, target_results):
            # removing any pairs which match
            for c in base.copy():
                for b in target.copy():
+                    if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
+                        continue
                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
                    if not res:
                        matches.append(resstr)
@@ -70,6 +230,8 @@ def regression_common(args, logger, base_results, target_results):
            # Should only now see regressions, we may not be able to match multiple pairs directly
            for c in base:
                for b in target:
+                    if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
+                        continue
                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
                    if res:
                        regressions.append(resstr)
author	Alexis Lothoré <alexis.lothore@bootlin.com>	2023-02-24 17:45:52 +0100
committer	Richard Purdie <richard.purdie@linuxfoundation.org>	2023-02-26 11:59:52 +0000
commit	383cd86595c84a6b6e78bc9d15f10052d60d8193 (patch)
tree	e45d30571f21a78297a0daf8a48b4435657107a4 /scripts
parent	88345ecf477f095ff7f8c4d22f4a919fa87ab116 (diff)
download	poky-383cd86595c84a6b6e78bc9d15f10052d60d8193.tar.gz

diff --git a/scripts/lib/resulttool/regression.py b/scripts/lib/resulttool/regression.py index d0b0c31805..1b0c8335a3 100644 --- a/scripts/lib/resulttool/regression.py +++ b/scripts/lib/resulttool/regression.py
@@ -11,6 +11,164 @@ import resulttool.resultutils as resultutils
11	from oeqa.utils.git import GitRepo	11	from oeqa.utils.git import GitRepo
12	import oeqa.utils.gitarchive as gitarchive	12	import oeqa.utils.gitarchive as gitarchive
13		13
		14	METADATA_MATCH_TABLE = {
		15	"oeselftest": "OESELFTEST_METADATA"
		16	}
		17
		18	OESELFTEST_METADATA_GUESS_TABLE={
		19	"trigger-build-posttrigger": {
		20	"run_all_tests": False,
		21	"run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
		22	"skips": None,
		23	"machine": None,
		24	"select_tags":None,
		25	"exclude_tags": None
		26	},
		27	"reproducible": {
		28	"run_all_tests": False,
		29	"run_tests":["reproducible"],
		30	"skips": None,
		31	"machine": None,
		32	"select_tags":None,
		33	"exclude_tags": None
		34	},
		35	"arch-qemu-quick": {
		36	"run_all_tests": True,
		37	"run_tests":None,
		38	"skips": None,
		39	"machine": None,
		40	"select_tags":["machine"],
		41	"exclude_tags": None
		42	},
		43	"arch-qemu-full-x86-or-x86_64": {
		44	"run_all_tests": True,
		45	"run_tests":None,
		46	"skips": None,
		47	"machine": None,
		48	"select_tags":["machine", "toolchain-system"],
		49	"exclude_tags": None
		50	},
		51	"arch-qemu-full-others": {
		52	"run_all_tests": True,
		53	"run_tests":None,
		54	"skips": None,
		55	"machine": None,
		56	"select_tags":["machine", "toolchain-user"],
		57	"exclude_tags": None
		58	},
		59	"selftest": {
		60	"run_all_tests": True,
		61	"run_tests":None,
		62	"skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
		63	"machine": None,
		64	"select_tags":None,
		65	"exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
		66	},
		67	"bringup": {
		68	"run_all_tests": True,
		69	"run_tests":None,
		70	"skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
		71	"machine": None,
		72	"select_tags":None,
		73	"exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
		74	}
		75	}
		76
		77	def test_has_at_least_one_matching_tag(test, tag_list):
		78	return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
		79
		80	def all_tests_have_at_least_one_matching_tag(results, tag_list):
		81	return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
		82
		83	def any_test_have_any_matching_tag(results, tag_list):
		84	return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
		85
		86	def have_skipped_test(result, test_prefix):
		87	return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
		88
		89	def have_all_tests_skipped(result, test_prefixes_list):
		90	return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
		91
		92	def guess_oeselftest_metadata(results):
		93	"""
		94	When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
		95	Check results for specific values (absence/presence of oetags, number and name of executed tests...),
		96	and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
		97	to it to allow proper test filtering.
		98	This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
		99	as new tests will have OESELFTEST_METADATA properly appended at test reporting time
		100	"""
		101
		102	if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
		103	return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
		104	elif all(result.startswith("reproducible") for result in results):
		105	return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
		106	elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
		107	return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
		108	elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
		109	return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
		110	elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
		111	return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
		112	elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
		113	if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
		114	return OESELFTEST_METADATA_GUESS_TABLE['selftest']
		115	elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
		116	return OESELFTEST_METADATA_GUESS_TABLE['bringup']
		117
		118	return None
		119
		120
		121	def metadata_matches(base_configuration, target_configuration):
		122	"""
		123	For passed base and target, check test type. If test type matches one of
		124	properties described in METADATA_MATCH_TABLE, compare metadata if it is
		125	present in base. Return true if metadata matches, or if base lacks some
		126	data (either TEST_TYPE or the corresponding metadata)
		127	"""
		128	test_type = base_configuration.get('TEST_TYPE')
		129	if test_type not in METADATA_MATCH_TABLE:
		130	return True
		131
		132	metadata_key = METADATA_MATCH_TABLE.get(test_type)
		133	if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
		134	return False
		135
		136	return True
		137
		138
		139	def machine_matches(base_configuration, target_configuration):
		140	return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
		141
		142
		143	def can_be_compared(logger, base, target):
		144	"""
		145	Some tests are not relevant to be compared, for example some oeselftest
		146	run with different tests sets or parameters. Return true if tests can be
		147	compared
		148	"""
		149	base_configuration = base['configuration']
		150	target_configuration = target['configuration']
		151
		152	# Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
		153	if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
		154	guess = guess_oeselftest_metadata(base['result'])
		155	if guess is None:
		156	logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
		157	else:
		158	logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
		159	base_configuration['OESELFTEST_METADATA'] = guess
		160	if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
		161	guess = guess_oeselftest_metadata(target['result'])
		162	if guess is None:
		163	logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
		164	else:
		165	logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
		166	target_configuration['OESELFTEST_METADATA'] = guess
		167
		168	return metadata_matches(base_configuration, target_configuration) \
		169	and machine_matches(base_configuration, target_configuration)
		170
		171
14	def compare_result(logger, base_name, target_name, base_result, target_result):	172	def compare_result(logger, base_name, target_name, base_result, target_result):
15	base_result = base_result.get('result')	173	base_result = base_result.get('result')
16	target_result = target_result.get('result')	174	target_result = target_result.get('result')
@@ -61,6 +219,8 @@ def regression_common(args, logger, base_results, target_results):
61	# removing any pairs which match	219	# removing any pairs which match
62	for c in base.copy():	220	for c in base.copy():
63	for b in target.copy():	221	for b in target.copy():
		222	if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
		223	continue
64	res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])	224	res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
65	if not res:	225	if not res:
66	matches.append(resstr)	226	matches.append(resstr)
@@ -70,6 +230,8 @@ def regression_common(args, logger, base_results, target_results):
70	# Should only now see regressions, we may not be able to match multiple pairs directly	230	# Should only now see regressions, we may not be able to match multiple pairs directly
71	for c in base:	231	for c in base:
72	for b in target:	232	for b in target:
		233	if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
		234	continue
73	res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])	235	res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
74	if res:	236	if res:
75	regressions.append(resstr)	237	regressions.append(resstr)