1 files changed, 274 insertions, 10 deletions
diff --git a/scripts/lib/resulttool/regression.py b/scripts/lib/resulttool/regression.py
index 9f952951b3..33b3119c54 100644
--- a/scripts/lib/resulttool/regression.py
+++ b/scripts/lib/resulttool/regression.py
@@ -7,17 +7,213 @@
 #
 import resulttool.resultutils as resultutils
-import json
 from oeqa.utils.git import GitRepo
 import oeqa.utils.gitarchive as gitarchive
-def compare_result(logger, base_name, target_name, base_result, target_result):
+METADATA_MATCH_TABLE = {
+    "oeselftest": "OESELFTEST_METADATA"
+}
+OESELFTEST_METADATA_GUESS_TABLE={
+    "trigger-build-posttrigger": {
+        "run_all_tests": False,
+        "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
+        "skips": None,
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": None
+    },
+    "reproducible": {
+        "run_all_tests": False,
+        "run_tests":["reproducible"],
+        "skips": None,
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": None
+    },
+    "arch-qemu-quick": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": None,
+        "machine": None,
+        "select_tags":["machine"],
+        "exclude_tags": None
+    },
+    "arch-qemu-full-x86-or-x86_64": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": None,
+        "machine": None,
+        "select_tags":["machine", "toolchain-system"],
+        "exclude_tags": None
+    },
+    "arch-qemu-full-others": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": None,
+        "machine": None,
+        "select_tags":["machine", "toolchain-user"],
+        "exclude_tags": None
+    },
+    "selftest": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
+    },
+    "bringup": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
+    }
+}
+STATUS_STRINGS = {
+    "None": "No matching test result"
+}
+REGRESSIONS_DISPLAY_LIMIT=50
+MISSING_TESTS_BANNER =   "-------------------------- Missing tests --------------------------"
+ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------"
+def test_has_at_least_one_matching_tag(test, tag_list):
+    return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
+def all_tests_have_at_least_one_matching_tag(results, tag_list):
+    return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
+def any_test_have_any_matching_tag(results, tag_list):
+    return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
+def have_skipped_test(result, test_prefix):
+    return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
+def have_all_tests_skipped(result, test_prefixes_list):
+    return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
+def guess_oeselftest_metadata(results):
+    """
+    When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
+    Check results for specific values (absence/presence of oetags, number and name of executed tests...),
+    and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
+    to it to allow proper test filtering.
+    This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
+    as new tests will have OESELFTEST_METADATA properly appended at test reporting time
+    """
+    if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
+        return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
+    elif all(result.startswith("reproducible") for result in results):
+        return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
+    elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
+        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
+    elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
+        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
+    elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
+        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
+    elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
+        if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
+            return OESELFTEST_METADATA_GUESS_TABLE['selftest']
+        elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
+            return OESELFTEST_METADATA_GUESS_TABLE['bringup']
+    return None
+def metadata_matches(base_configuration, target_configuration):
+    """
+    For passed base and target, check test type. If test type matches one of
+    properties described in METADATA_MATCH_TABLE, compare metadata if it is
+    present in base. Return true if metadata matches, or if base lacks some
+    data (either TEST_TYPE or the corresponding metadata)
+    """
+    test_type = base_configuration.get('TEST_TYPE')
+    if test_type not in METADATA_MATCH_TABLE:
+        return True
+    metadata_key = METADATA_MATCH_TABLE.get(test_type)
+    if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
+        return False
+    return True
+def machine_matches(base_configuration, target_configuration):
+    return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
+def can_be_compared(logger, base, target):
+    """
+    Some tests are not relevant to be compared, for example some oeselftest
+    run with different tests sets or parameters. Return true if tests can be
+    compared
+    """
+    ret = True
+    base_configuration = base['configuration']
+    target_configuration = target['configuration']
+    # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
+    if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
+        guess = guess_oeselftest_metadata(base['result'])
+        if guess is None:
+            logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
+        else:
+            logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
+            base_configuration['OESELFTEST_METADATA'] = guess
+    if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
+        guess = guess_oeselftest_metadata(target['result'])
+        if guess is None:
+            logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
+        else:
+            logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
+            target_configuration['OESELFTEST_METADATA'] = guess
+    # Test runs with LTP results in should only be compared with other runs with LTP tests in them
+    if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
+        ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
+    return ret and metadata_matches(base_configuration, target_configuration) \
+        and machine_matches(base_configuration, target_configuration)
+def get_status_str(raw_status):
+    raw_status_lower = raw_status.lower() if raw_status else "None"
+    return STATUS_STRINGS.get(raw_status_lower, raw_status)
+def get_additional_info_line(new_pass_count, new_tests):
+    result=[]
+    if new_tests:
+        result.append(f'+{new_tests} test(s) present')
+    if new_pass_count:
+        result.append(f'+{new_pass_count} test(s) now passing')
+    if not result:
+        return ""
+    return '    -> ' + ', '.join(result) + '\n'
+def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None):
    base_result = base_result.get('result')
    target_result = target_result.get('result')
    result = {}
+    new_tests = 0
+    regressions = {}
+    resultstring = ""
+    new_tests = 0
+    new_pass_count = 0
+    display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT
    if base_result and target_result:
        for k in base_result:
+            if k in ['ptestresult.rawlogs', 'ptestresult.sections']:
+                continue
            base_testcase = base_result[k]
            base_status = base_testcase.get('status')
            if base_status:
@@ -27,12 +223,47 @@ def compare_result(logger, base_name, target_name, base_result, target_result):
                    result[k] = {'base': base_status, 'target': target_status}
            else:
                logger.error('Failed to retrieved base test case status: %s' % k)
+        # Also count new tests that were not present in base results: it
+        # could be newly added tests, but it could also highlights some tests
+        # renames or fixed faulty ptests
+        for k in target_result:
+            if k not in base_result:
+                new_tests += 1
    if result:
-        resultstring = "Regression: %s\n            %s\n" % (base_name, target_name)
+        new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
-        for k in sorted(result):
+        # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
-            resultstring += '    %s: %s -> %s\n' % (k, result[k]['base'], result[k]['target'])
+        if new_pass_count < len(result):
+            resultstring = "Regression:  %s\n             %s\n" % (base_name, target_name)
+            for k in sorted(result):
+                if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
+                    # Differentiate each ptest kind when listing regressions
+                    key_parts = k.split('.')
+                    key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0]
+                    # Append new regression to corresponding test family
+                    regressions[key] = regressions.setdefault(key, []) + ['        %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))]
+            resultstring += f"    Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n"
+            for k in regressions:
+                resultstring += f"    {len(regressions[k])} regression(s) for {k}\n"
+                count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k])
+                resultstring += ''.join(regressions[k][:count_to_print])
+                if count_to_print < len(regressions[k]):
+                    resultstring+='        [...]\n'
+            if new_pass_count > 0:
+                resultstring += f'    Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
+            if new_tests > 0:
+                resultstring += f'    Additionally, {new_tests} new test(s) is/are present\n'
+        else:
+            resultstring = "%s\n%s\n" % (base_name, target_name)
+            result = None
    else:
-        resultstring = "Match: %s\n       %s" % (base_name, target_name)
+        resultstring = "%s\n%s\n" % (base_name, target_name)
+    if not result:
+        additional_info = get_additional_info_line(new_pass_count, new_tests)
+        if additional_info:
+            resultstring += additional_info
    return result, resultstring
 def get_results(logger, source):
@@ -44,12 +275,38 @@ def regression(args, logger):
    regression_common(args, logger, base_results, target_results)
+# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
+# Truncating the test names works since they contain file and line number identifiers
+# which allows us to match them without the random components.
+def fixup_ptest_names(results, logger):
+    for r in results:
+        for i in results[r]:
+            tests = list(results[r][i]['result'].keys())
+            for test in tests:
+                new = None
+                if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
+                    new = test.split("_-_")[0]
+                elif test.startswith(("ptestresult.curl.")) and "__" in test:
+                    new = test.split("__")[0]
+                elif test.startswith(("ptestresult.dbus.")) and "__" in test:
+                    new = test.split("__")[0]
+                elif test.startswith("ptestresult.binutils") and "build-st-" in test:
+                    new = test.split(" ")[0]
+                elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
+                    new = ".".join(test.split(".")[:2])
+                if new:
+                    results[r][i]['result'][new] = results[r][i]['result'][test]
+                    del results[r][i]['result'][test]
 def regression_common(args, logger, base_results, target_results):
    if args.base_result_id:
        base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
    if args.target_result_id:
        target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
+    fixup_ptest_names(base_results, logger)
+    fixup_ptest_names(target_results, logger)
    matches = []
    regressions = []
    notfound = []
@@ -62,7 +319,9 @@ def regression_common(args, logger, base_results, target_results):
            # removing any pairs which match
            for c in base.copy():
                for b in target.copy():
-                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
+                    if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
+                        continue
+                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
                    if not res:
                        matches.append(resstr)
                        base.remove(c)
@@ -71,15 +330,18 @@ def regression_common(args, logger, base_results, target_results):
            # Should only now see regressions, we may not be able to match multiple pairs directly
            for c in base:
                for b in target:
-                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
+                    if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
+                        continue
+                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
                    if res:
                        regressions.append(resstr)
        else:
            notfound.append("%s not found in target" % a)
-    print("\n".join(sorted(matches)))
    print("\n".join(sorted(regressions)))
+    print("\n" + MISSING_TESTS_BANNER + "\n")
    print("\n".join(sorted(notfound)))
+    print("\n" + ADDITIONAL_DATA_BANNER + "\n")
+    print("\n".join(sorted(matches)))
    return 0
 def regression_git(args, logger):
@@ -162,6 +424,7 @@ def register_commands(subparsers):
                              help='(optional) filter the base results to this result ID')
    parser_build.add_argument('-t', '--target-result-id', default='',
                              help='(optional) filter the target results to this result ID')
+    parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")
    parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
                                         description='regression analysis comparing base result set to target '
@@ -183,4 +446,5 @@ def register_commands(subparsers):
    parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
    parser_build.add_argument('--commit2', help="Revision to compare with")
    parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
+    parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")

diff --git a/scripts/lib/resulttool/regression.py b/scripts/lib/resulttool/regression.py index 9f952951b3..33b3119c54 100644 --- a/scripts/lib/resulttool/regression.py +++ b/scripts/lib/resulttool/regression.py
@@ -7,17 +7,213 @@
7	#	7	#
8		8
9	import resulttool.resultutils as resultutils	9	import resulttool.resultutils as resultutils
10	import json
11		10
12	from oeqa.utils.git import GitRepo	11	from oeqa.utils.git import GitRepo
13	import oeqa.utils.gitarchive as gitarchive	12	import oeqa.utils.gitarchive as gitarchive
14		13
15	def compare_result(logger, base_name, target_name, base_result, target_result):	14	METADATA_MATCH_TABLE = {
		15	"oeselftest": "OESELFTEST_METADATA"
		16	}
		17
		18	OESELFTEST_METADATA_GUESS_TABLE={
		19	"trigger-build-posttrigger": {
		20	"run_all_tests": False,
		21	"run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
		22	"skips": None,
		23	"machine": None,
		24	"select_tags":None,
		25	"exclude_tags": None
		26	},
		27	"reproducible": {
		28	"run_all_tests": False,
		29	"run_tests":["reproducible"],
		30	"skips": None,
		31	"machine": None,
		32	"select_tags":None,
		33	"exclude_tags": None
		34	},
		35	"arch-qemu-quick": {
		36	"run_all_tests": True,
		37	"run_tests":None,
		38	"skips": None,
		39	"machine": None,
		40	"select_tags":["machine"],
		41	"exclude_tags": None
		42	},
		43	"arch-qemu-full-x86-or-x86_64": {
		44	"run_all_tests": True,
		45	"run_tests":None,
		46	"skips": None,
		47	"machine": None,
		48	"select_tags":["machine", "toolchain-system"],
		49	"exclude_tags": None
		50	},
		51	"arch-qemu-full-others": {
		52	"run_all_tests": True,
		53	"run_tests":None,
		54	"skips": None,
		55	"machine": None,
		56	"select_tags":["machine", "toolchain-user"],
		57	"exclude_tags": None
		58	},
		59	"selftest": {
		60	"run_all_tests": True,
		61	"run_tests":None,
		62	"skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
		63	"machine": None,
		64	"select_tags":None,
		65	"exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
		66	},
		67	"bringup": {
		68	"run_all_tests": True,
		69	"run_tests":None,
		70	"skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
		71	"machine": None,
		72	"select_tags":None,
		73	"exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
		74	}
		75	}
		76
		77	STATUS_STRINGS = {
		78	"None": "No matching test result"
		79	}
		80
		81	REGRESSIONS_DISPLAY_LIMIT=50
		82
		83	MISSING_TESTS_BANNER = "-------------------------- Missing tests --------------------------"
		84	ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------"
		85
		86	def test_has_at_least_one_matching_tag(test, tag_list):
		87	return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
		88
		89	def all_tests_have_at_least_one_matching_tag(results, tag_list):
		90	return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
		91
		92	def any_test_have_any_matching_tag(results, tag_list):
		93	return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
		94
		95	def have_skipped_test(result, test_prefix):
		96	return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
		97
		98	def have_all_tests_skipped(result, test_prefixes_list):
		99	return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
		100
		101	def guess_oeselftest_metadata(results):
		102	"""
		103	When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
		104	Check results for specific values (absence/presence of oetags, number and name of executed tests...),
		105	and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
		106	to it to allow proper test filtering.
		107	This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
		108	as new tests will have OESELFTEST_METADATA properly appended at test reporting time
		109	"""
		110
		111	if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
		112	return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
		113	elif all(result.startswith("reproducible") for result in results):
		114	return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
		115	elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
		116	return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
		117	elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
		118	return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
		119	elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
		120	return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
		121	elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
		122	if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
		123	return OESELFTEST_METADATA_GUESS_TABLE['selftest']
		124	elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
		125	return OESELFTEST_METADATA_GUESS_TABLE['bringup']
		126
		127	return None
		128
		129
		130	def metadata_matches(base_configuration, target_configuration):
		131	"""
		132	For passed base and target, check test type. If test type matches one of
		133	properties described in METADATA_MATCH_TABLE, compare metadata if it is
		134	present in base. Return true if metadata matches, or if base lacks some
		135	data (either TEST_TYPE or the corresponding metadata)
		136	"""
		137	test_type = base_configuration.get('TEST_TYPE')
		138	if test_type not in METADATA_MATCH_TABLE:
		139	return True
		140
		141	metadata_key = METADATA_MATCH_TABLE.get(test_type)
		142	if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
		143	return False
		144
		145	return True
		146
		147
		148	def machine_matches(base_configuration, target_configuration):
		149	return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
		150
		151
		152	def can_be_compared(logger, base, target):
		153	"""
		154	Some tests are not relevant to be compared, for example some oeselftest
		155	run with different tests sets or parameters. Return true if tests can be
		156	compared
		157	"""
		158	ret = True
		159	base_configuration = base['configuration']
		160	target_configuration = target['configuration']
		161
		162	# Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
		163	if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
		164	guess = guess_oeselftest_metadata(base['result'])
		165	if guess is None:
		166	logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
		167	else:
		168	logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
		169	base_configuration['OESELFTEST_METADATA'] = guess
		170	if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
		171	guess = guess_oeselftest_metadata(target['result'])
		172	if guess is None:
		173	logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
		174	else:
		175	logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
		176	target_configuration['OESELFTEST_METADATA'] = guess
		177
		178	# Test runs with LTP results in should only be compared with other runs with LTP tests in them
		179	if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
		180	ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
		181
		182	return ret and metadata_matches(base_configuration, target_configuration) \
		183	and machine_matches(base_configuration, target_configuration)
		184
		185	def get_status_str(raw_status):
		186	raw_status_lower = raw_status.lower() if raw_status else "None"
		187	return STATUS_STRINGS.get(raw_status_lower, raw_status)
		188
		189	def get_additional_info_line(new_pass_count, new_tests):
		190	result=[]
		191	if new_tests:
		192	result.append(f'+{new_tests} test(s) present')
		193	if new_pass_count:
		194	result.append(f'+{new_pass_count} test(s) now passing')
		195
		196	if not result:
		197	return ""
		198
		199	return ' -> ' + ', '.join(result) + '\n'
		200
		201	def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None):
16	base_result = base_result.get('result')	202	base_result = base_result.get('result')
17	target_result = target_result.get('result')	203	target_result = target_result.get('result')
18	result = {}	204	result = {}
		205	new_tests = 0
		206	regressions = {}
		207	resultstring = ""
		208	new_tests = 0
		209	new_pass_count = 0
		210
		211	display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT
		212
19	if base_result and target_result:	213	if base_result and target_result:
20	for k in base_result:	214	for k in base_result:
		215	if k in ['ptestresult.rawlogs', 'ptestresult.sections']:
		216	continue
21	base_testcase = base_result[k]	217	base_testcase = base_result[k]
22	base_status = base_testcase.get('status')	218	base_status = base_testcase.get('status')
23	if base_status:	219	if base_status:
@@ -27,12 +223,47 @@ def compare_result(logger, base_name, target_name, base_result, target_result):
27	result[k] = {'base': base_status, 'target': target_status}	223	result[k] = {'base': base_status, 'target': target_status}
28	else:	224	else:
29	logger.error('Failed to retrieved base test case status: %s' % k)	225	logger.error('Failed to retrieved base test case status: %s' % k)
		226
		227	# Also count new tests that were not present in base results: it
		228	# could be newly added tests, but it could also highlights some tests
		229	# renames or fixed faulty ptests
		230	for k in target_result:
		231	if k not in base_result:
		232	new_tests += 1
30	if result:	233	if result:
31	resultstring = "Regression: %s\n %s\n" % (base_name, target_name)	234	new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
32	for k in sorted(result):	235	# Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
33	resultstring += ' %s: %s -> %s\n' % (k, result[k]['base'], result[k]['target'])	236	if new_pass_count < len(result):
		237	resultstring = "Regression: %s\n %s\n" % (base_name, target_name)
		238	for k in sorted(result):
		239	if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
		240	# Differentiate each ptest kind when listing regressions
		241	key_parts = k.split('.')
		242	key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0]
		243	# Append new regression to corresponding test family
		244	regressions[key] = regressions.setdefault(key, []) + [' %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))]
		245	resultstring += f" Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n"
		246	for k in regressions:
		247	resultstring += f" {len(regressions[k])} regression(s) for {k}\n"
		248	count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k])
		249	resultstring += ''.join(regressions[k][:count_to_print])
		250	if count_to_print < len(regressions[k]):
		251	resultstring+=' [...]\n'
		252	if new_pass_count > 0:
		253	resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
		254	if new_tests > 0:
		255	resultstring += f' Additionally, {new_tests} new test(s) is/are present\n'
		256	else:
		257	resultstring = "%s\n%s\n" % (base_name, target_name)
		258	result = None
34	else:	259	else:
35	resultstring = "Match: %s\n %s" % (base_name, target_name)	260	resultstring = "%s\n%s\n" % (base_name, target_name)
		261
		262	if not result:
		263	additional_info = get_additional_info_line(new_pass_count, new_tests)
		264	if additional_info:
		265	resultstring += additional_info
		266
36	return result, resultstring	267	return result, resultstring
37		268
38	def get_results(logger, source):	269	def get_results(logger, source):
@@ -44,12 +275,38 @@ def regression(args, logger):
44		275
45	regression_common(args, logger, base_results, target_results)	276	regression_common(args, logger, base_results, target_results)
46		277
		278	# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
		279	# Truncating the test names works since they contain file and line number identifiers
		280	# which allows us to match them without the random components.
		281	def fixup_ptest_names(results, logger):
		282	for r in results:
		283	for i in results[r]:
		284	tests = list(results[r][i]['result'].keys())
		285	for test in tests:
		286	new = None
		287	if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
		288	new = test.split("_-_")[0]
		289	elif test.startswith(("ptestresult.curl.")) and "__" in test:
		290	new = test.split("__")[0]
		291	elif test.startswith(("ptestresult.dbus.")) and "__" in test:
		292	new = test.split("__")[0]
		293	elif test.startswith("ptestresult.binutils") and "build-st-" in test:
		294	new = test.split(" ")[0]
		295	elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
		296	new = ".".join(test.split(".")[:2])
		297	if new:
		298	results[r][i]['result'][new] = results[r][i]['result'][test]
		299	del results[r][i]['result'][test]
		300
47	def regression_common(args, logger, base_results, target_results):	301	def regression_common(args, logger, base_results, target_results):
48	if args.base_result_id:	302	if args.base_result_id:
49	base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)	303	base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
50	if args.target_result_id:	304	if args.target_result_id:
51	target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)	305	target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
52		306
		307	fixup_ptest_names(base_results, logger)
		308	fixup_ptest_names(target_results, logger)
		309
53	matches = []	310	matches = []
54	regressions = []	311	regressions = []
55	notfound = []	312	notfound = []
@@ -62,7 +319,9 @@ def regression_common(args, logger, base_results, target_results):
62	# removing any pairs which match	319	# removing any pairs which match
63	for c in base.copy():	320	for c in base.copy():
64	for b in target.copy():	321	for b in target.copy():
65	res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])	322	if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
		323	continue
		324	res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
66	if not res:	325	if not res:
67	matches.append(resstr)	326	matches.append(resstr)
68	base.remove(c)	327	base.remove(c)
@@ -71,15 +330,18 @@ def regression_common(args, logger, base_results, target_results):
71	# Should only now see regressions, we may not be able to match multiple pairs directly	330	# Should only now see regressions, we may not be able to match multiple pairs directly
72	for c in base:	331	for c in base:
73	for b in target:	332	for b in target:
74	res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])	333	if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
		334	continue
		335	res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
75	if res:	336	if res:
76	regressions.append(resstr)	337	regressions.append(resstr)
77	else:	338	else:
78	notfound.append("%s not found in target" % a)	339	notfound.append("%s not found in target" % a)
79	print("\n".join(sorted(matches)))
80	print("\n".join(sorted(regressions)))	340	print("\n".join(sorted(regressions)))
		341	print("\n" + MISSING_TESTS_BANNER + "\n")
81	print("\n".join(sorted(notfound)))	342	print("\n".join(sorted(notfound)))
82		343	print("\n" + ADDITIONAL_DATA_BANNER + "\n")
		344	print("\n".join(sorted(matches)))
83	return 0	345	return 0
84		346
85	def regression_git(args, logger):	347	def regression_git(args, logger):
@@ -162,6 +424,7 @@ def register_commands(subparsers):
162	help='(optional) filter the base results to this result ID')	424	help='(optional) filter the base results to this result ID')
163	parser_build.add_argument('-t', '--target-result-id', default='',	425	parser_build.add_argument('-t', '--target-result-id', default='',
164	help='(optional) filter the target results to this result ID')	426	help='(optional) filter the target results to this result ID')
		427	parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")
165		428
166	parser_build = subparsers.add_parser('regression-git', help='regression git analysis',	429	parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
167	description='regression analysis comparing base result set to target '	430	description='regression analysis comparing base result set to target '
@@ -183,4 +446,5 @@ def register_commands(subparsers):
183	parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")	446	parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
184	parser_build.add_argument('--commit2', help="Revision to compare with")	447	parser_build.add_argument('--commit2', help="Revision to compare with")
185	parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")	448	parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
		449	parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")
186		450