# resulttool - regression analysis # # Copyright (c) 2019, Intel Corporation. # Copyright (c) 2019, Linux Foundation # # SPDX-License-Identifier: GPL-2.0-only # import resulttool.resultutils as resultutils from oeqa.utils.git import GitRepo import oeqa.utils.gitarchive as gitarchive METADATA_MATCH_TABLE = { "oeselftest": "OESELFTEST_METADATA" } OESELFTEST_METADATA_GUESS_TABLE={ "trigger-build-posttrigger": { "run_all_tests": False, "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"], "skips": None, "machine": None, "select_tags":None, "exclude_tags": None }, "reproducible": { "run_all_tests": False, "run_tests":["reproducible"], "skips": None, "machine": None, "select_tags":None, "exclude_tags": None }, "arch-qemu-quick": { "run_all_tests": True, "run_tests":None, "skips": None, "machine": None, "select_tags":["machine"], "exclude_tags": None }, "arch-qemu-full-x86-or-x86_64": { "run_all_tests": True, "run_tests":None, "skips": None, "machine": None, "select_tags":["machine", "toolchain-system"], "exclude_tags": None }, "arch-qemu-full-others": { "run_all_tests": True, "run_tests":None, "skips": None, "machine": None, "select_tags":["machine", "toolchain-user"], "exclude_tags": None }, "selftest": { "run_all_tests": True, "run_tests":None, "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"], "machine": None, "select_tags":None, "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] }, "bringup": { "run_all_tests": True, "run_tests":None, "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"], "machine": None, "select_tags":None, "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] } } def test_has_at_least_one_matching_tag(test, tag_list): return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"]) def all_tests_have_at_least_one_matching_tag(results, tag_list): return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items()) def any_test_have_any_matching_tag(results, tag_list): return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values()) def have_skipped_test(result, test_prefix): return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix)) def have_all_tests_skipped(result, test_prefixes_list): return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list) def guess_oeselftest_metadata(results): """ When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content. Check results for specific values (absence/presence of oetags, number and name of executed tests...), and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA to it to allow proper test filtering. This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less, as new tests will have OESELFTEST_METADATA properly appended at test reporting time """ if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results: return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger'] elif all(result.startswith("reproducible") for result in results): return OESELFTEST_METADATA_GUESS_TABLE['reproducible'] elif all_tests_have_at_least_one_matching_tag(results, ["machine"]): return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick'] elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]): return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64'] elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]): return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others'] elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]): if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]): return OESELFTEST_METADATA_GUESS_TABLE['selftest'] elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]): return OESELFTEST_METADATA_GUESS_TABLE['bringup'] return None def metadata_matches(base_configuration, target_configuration): """ For passed base and target, check test type. If test type matches one of properties described in METADATA_MATCH_TABLE, compare metadata if it is present in base. Return true if metadata matches, or if base lacks some data (either TEST_TYPE or the corresponding metadata) """ test_type = base_configuration.get('TEST_TYPE') if test_type not in METADATA_MATCH_TABLE: return True metadata_key = METADATA_MATCH_TABLE.get(test_type) if target_configuration.get(metadata_key) != base_configuration.get(metadata_key): return False return True def machine_matches(base_configuration, target_configuration): return base_configuration.get('MACHINE') == target_configuration.get('MACHINE') def can_be_compared(logger, base, target): """ Some tests are not relevant to be compared, for example some oeselftest run with different tests sets or parameters. Return true if tests can be compared """ ret = True base_configuration = base['configuration'] target_configuration = target['configuration'] # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results. if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration: guess = guess_oeselftest_metadata(base['result']) if guess is None: logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}") else: logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}") base_configuration['OESELFTEST_METADATA'] = guess if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration: guess = guess_oeselftest_metadata(target['result']) if guess is None: logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}") else: logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}") target_configuration['OESELFTEST_METADATA'] = guess # Test runs with LTP results in should only be compared with other runs with LTP tests in them if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']): ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result']) return ret and metadata_matches(base_configuration, target_configuration) \ and machine_matches(base_configuration, target_configuration) def compare_result(logger, base_name, target_name, base_result, target_result): base_result = base_result.get('result') target_result = target_result.get('result') result = {} if base_result and target_result: for k in base_result: base_testcase = base_result[k] base_status = base_testcase.get('status') if base_status: target_testcase = target_result.get(k, {}) target_status = target_testcase.get('status') if base_status != target_status: result[k] = {'base': base_status, 'target': target_status} else: logger.error('Failed to retrieved base test case status: %s' % k) if result: resultstring = "Regression: %s\n %s\n" % (base_name, target_name) for k in sorted(result): resultstring += ' %s: %s -> %s\n' % (k, result[k]['base'], result[k]['target']) else: resultstring = "Match: %s\n %s" % (base_name, target_name) return result, resultstring def get_results(logger, source): return resultutils.load_resultsdata(source, configmap=resultutils.regression_map) def regression(args, logger): base_results = get_results(logger, args.base_result) target_results = get_results(logger, args.target_result) regression_common(args, logger, base_results, target_results) def regression_common(args, logger, base_results, target_results): if args.base_result_id: base_results = resultutils.filter_resultsdata(base_results, args.base_result_id) if args.target_result_id: target_results = resultutils.filter_resultsdata(target_results, args.target_result_id) matches = [] regressions = [] notfound = [] for a in base_results: if a in target_results: base = list(base_results[a].keys()) target = list(target_results[a].keys()) # We may have multiple base/targets which are for different configurations. Start by # removing any pairs which match for c in base.copy(): for b in target.copy(): if not can_be_compared(logger, base_results[a][c], target_results[a][b]): continue res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) if not res: matches.append(resstr) base.remove(c) target.remove(b) break # Should only now see regressions, we may not be able to match multiple pairs directly for c in base: for b in target: if not can_be_compared(logger, base_results[a][c], target_results[a][b]): continue res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b]) if res: regressions.append(resstr) else: notfound.append("%s not found in target" % a) print("\n".join(sorted(matches))) print("\n".join(sorted(regressions))) print("\n".join(sorted(notfound))) return 0 # Some test case naming is poor and contains random strings, particularly lttng/babeltrace. # Truncating the test names works since they contain file and line number identifiers # which allows us to match them without the random components. def fixup_ptest_names(results, logger): for r in results: for i in results[r]: tests = list(results[r][i]['result'].keys()) for test in tests: new = None if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test: new = test.split("_-_")[0] elif test.startswith(("ptestresult.curl.")) and "__" in test: new = test.split("__")[0] if new: results[r][i]['result'][new] = results[r][i]['result'][test] del results[r][i]['result'][test] def regression_git(args, logger): base_results = {} target_results = {} tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}" repo = GitRepo(args.repo) revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch) if args.branch2: revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2) if not len(revs2): logger.error("No revisions found to compare against") return 1 if not len(revs): logger.error("No revision to report on found") return 1 else: if len(revs) < 2: logger.error("Only %d tester revisions found, unable to generate report" % len(revs)) return 1 # Pick revisions if args.commit: if args.commit_number: logger.warning("Ignoring --commit-number as --commit was specified") index1 = gitarchive.rev_find(revs, 'commit', args.commit) elif args.commit_number: index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number) else: index1 = len(revs) - 1 if args.branch2: revs2.append(revs[index1]) index1 = len(revs2) - 1 revs = revs2 if args.commit2: if args.commit_number2: logger.warning("Ignoring --commit-number2 as --commit2 was specified") index2 = gitarchive.rev_find(revs, 'commit', args.commit2) elif args.commit_number2: index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2) else: if index1 > 0: index2 = index1 - 1 # Find the closest matching commit number for comparision # In future we could check the commit is a common ancestor and # continue back if not but this good enough for now while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number: index2 = index2 - 1 else: logger.error("Unable to determine the other commit, use " "--commit2 or --commit-number2 to specify it") return 1 logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2])) base_results = resultutils.git_get_result(repo, revs[index1][2]) target_results = resultutils.git_get_result(repo, revs[index2][2]) fixup_ptest_names(base_results, logger) fixup_ptest_names(target_results, logger) regression_common(args, logger, base_results, target_results) return 0 def register_commands(subparsers): """Register subcommands from this plugin""" parser_build = subparsers.add_parser('regression', help='regression file/directory analysis', description='regression analysis comparing the base set of results to the target results', group='analysis') parser_build.set_defaults(func=regression) parser_build.add_argument('base_result', help='base result file/directory/URL for the comparison') parser_build.add_argument('target_result', help='target result file/directory/URL to compare with') parser_build.add_argument('-b', '--base-result-id', default='', help='(optional) filter the base results to this result ID') parser_build.add_argument('-t', '--target-result-id', default='', help='(optional) filter the target results to this result ID') parser_build = subparsers.add_parser('regression-git', help='regression git analysis', description='regression analysis comparing base result set to target ' 'result set', group='analysis') parser_build.set_defaults(func=regression_git) parser_build.add_argument('repo', help='the git repository containing the data') parser_build.add_argument('-b', '--base-result-id', default='', help='(optional) default select regression based on configurations unless base result ' 'id was provided') parser_build.add_argument('-t', '--target-result-id', default='', help='(optional) default select regression based on configurations unless target result ' 'id was provided') parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in") parser_build.add_argument('--branch2', help="Branch to find comparision revisions in") parser_build.add_argument('--commit', help="Revision to search for") parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified") parser_build.add_argument('--commit2', help="Revision to compare with") parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")