7 files changed, 456 insertions, 38 deletions
diff --git a/scripts/lib/resulttool/junit.py b/scripts/lib/resulttool/junit.py
new file mode 100644
index 0000000000..c7a53dc550
--- /dev/null
+++ b/scripts/lib/resulttool/junit.py
@@ -0,0 +1,77 @@
+# resulttool - report test results in JUnit XML format
+#
+# Copyright (c) 2024, Siemens AG.
+#
+# SPDX-License-Identifier: GPL-2.0-only
+#
+import os
+import re
+import xml.etree.ElementTree as ET
+import resulttool.resultutils as resultutils
+def junit(args, logger):
+    testresults = resultutils.load_resultsdata(args.json_file, configmap=resultutils.store_map)
+    total_time = 0
+    skipped = 0
+    failures = 0
+    errors = 0
+    for tests in testresults.values():
+        results = tests[next(reversed(tests))].get("result", {})
+    for result_id, result in results.items():
+        # filter out ptestresult.rawlogs and ptestresult.sections
+        if re.search(r'\.test_', result_id):
+            total_time += result.get("duration", 0)
+            if result['status'] == "FAILED":
+                failures += 1
+            elif result['status'] == "ERROR":
+                errors += 1
+            elif result['status'] == "SKIPPED":
+                skipped += 1
+    testsuites_node = ET.Element("testsuites")
+    testsuites_node.set("time", "%s" % total_time)
+    testsuite_node = ET.SubElement(testsuites_node, "testsuite")
+    testsuite_node.set("name", "Testimage")
+    testsuite_node.set("time", "%s" % total_time)
+    testsuite_node.set("tests", "%s" % len(results))
+    testsuite_node.set("failures", "%s" % failures)
+    testsuite_node.set("errors", "%s" % errors)
+    testsuite_node.set("skipped", "%s" % skipped)
+    for result_id, result in results.items():
+        if re.search(r'\.test_', result_id):
+            testcase_node = ET.SubElement(testsuite_node, "testcase", {
+                "name": result_id,
+                "classname": "Testimage",
+                "time": str(result['duration'])
+            })
+            if result['status'] == "SKIPPED":
+                ET.SubElement(testcase_node, "skipped", message=result['log'])
+            elif result['status'] == "FAILED":
+                ET.SubElement(testcase_node, "failure", message=result['log'])
+            elif result['status'] == "ERROR":
+                ET.SubElement(testcase_node, "error", message=result['log'])
+    tree = ET.ElementTree(testsuites_node)
+    if args.junit_xml_path is None:
+        args.junit_xml_path = os.environ['BUILDDIR'] + '/tmp/log/oeqa/junit.xml'
+    tree.write(args.junit_xml_path, encoding='UTF-8', xml_declaration=True)
+    logger.info('Saved JUnit XML report as %s' % args.junit_xml_path)
+def register_commands(subparsers):
+    """Register subcommands from this plugin"""
+    parser_build = subparsers.add_parser('junit', help='create test report in JUnit XML format',
+                                         description='generate unit test report in JUnit XML format based on the latest test results in the testresults.json.',
+                                         group='analysis')
+    parser_build.set_defaults(func=junit)
+    parser_build.add_argument('json_file',
+                              help='json file should point to the testresults.json')
+    parser_build.add_argument('-j', '--junit_xml_path',
+                              help='junit xml path allows setting the path of the generated test report. The default location is <build_dir>/tmp/log/oeqa/junit.xml')
diff --git a/scripts/lib/resulttool/log.py b/scripts/lib/resulttool/log.py
index eb3927ec82..15148ca288 100644
--- a/scripts/lib/resulttool/log.py
+++ b/scripts/lib/resulttool/log.py
@@ -28,12 +28,10 @@ def show_reproducible(result, reproducible, logger):
 def log(args, logger):
    results = resultutils.load_resultsdata(args.source)
-    ptest_count = sum(1 for _, _, _, r in resultutils.test_run_results(results) if 'ptestresult.sections' in r)
-    if ptest_count > 1 and not args.prepend_run:
-        print("%i ptest sections found. '--prepend-run' is required" % ptest_count)
-        return 1
    for _, run_name, _, r in resultutils.test_run_results(results):
+        if args.list_ptest:
+            print('\n'.join(sorted(r['ptestresult.sections'].keys())))
        if args.dump_ptest:
            for sectname in ['ptestresult.sections', 'ltpposixresult.sections', 'ltpresult.sections']:
             if sectname in r:
@@ -48,6 +46,9 @@ def log(args, logger):
                    os.makedirs(dest_dir, exist_ok=True)
                    dest = os.path.join(dest_dir, '%s.log' % name)
+                    if os.path.exists(dest):
+                        print("Overlapping ptest logs found, skipping %s. The '--prepend-run' option would avoid this" % name)
+                        continue
                    print(dest)
                    with open(dest, 'w') as f:
                        f.write(logdata)
@@ -86,6 +87,8 @@ def register_commands(subparsers):
    parser.set_defaults(func=log)
    parser.add_argument('source',
            help='the results file/directory/URL to import')
+    parser.add_argument('--list-ptest', action='store_true',
+            help='list the ptest test names')
    parser.add_argument('--ptest', action='append', default=[],
            help='show logs for a ptest')
    parser.add_argument('--dump-ptest', metavar='DIR',
diff --git a/scripts/lib/resulttool/manualexecution.py b/scripts/lib/resulttool/manualexecution.py
index ecb27c5933..ae0861ac6b 100755
--- a/scripts/lib/resulttool/manualexecution.py
+++ b/scripts/lib/resulttool/manualexecution.py
@@ -22,7 +22,7 @@ def load_json_file(f):
 def write_json_file(f, json_data):
    os.makedirs(os.path.dirname(f), exist_ok=True)
    with open(f, 'w') as filedata:
-        filedata.write(json.dumps(json_data, sort_keys=True, indent=4))
+        filedata.write(json.dumps(json_data, sort_keys=True, indent=1))
 class ManualTestRunner(object):
diff --git a/scripts/lib/resulttool/regression.py b/scripts/lib/resulttool/regression.py
index 9f952951b3..33b3119c54 100644
--- a/scripts/lib/resulttool/regression.py
+++ b/scripts/lib/resulttool/regression.py
@@ -7,17 +7,213 @@
 #
 import resulttool.resultutils as resultutils
-import json
 from oeqa.utils.git import GitRepo
 import oeqa.utils.gitarchive as gitarchive
-def compare_result(logger, base_name, target_name, base_result, target_result):
+METADATA_MATCH_TABLE = {
+    "oeselftest": "OESELFTEST_METADATA"
+}
+OESELFTEST_METADATA_GUESS_TABLE={
+    "trigger-build-posttrigger": {
+        "run_all_tests": False,
+        "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
+        "skips": None,
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": None
+    },
+    "reproducible": {
+        "run_all_tests": False,
+        "run_tests":["reproducible"],
+        "skips": None,
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": None
+    },
+    "arch-qemu-quick": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": None,
+        "machine": None,
+        "select_tags":["machine"],
+        "exclude_tags": None
+    },
+    "arch-qemu-full-x86-or-x86_64": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": None,
+        "machine": None,
+        "select_tags":["machine", "toolchain-system"],
+        "exclude_tags": None
+    },
+    "arch-qemu-full-others": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": None,
+        "machine": None,
+        "select_tags":["machine", "toolchain-user"],
+        "exclude_tags": None
+    },
+    "selftest": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
+    },
+    "bringup": {
+        "run_all_tests": True,
+        "run_tests":None,
+        "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
+        "machine": None,
+        "select_tags":None,
+        "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
+    }
+}
+STATUS_STRINGS = {
+    "None": "No matching test result"
+}
+REGRESSIONS_DISPLAY_LIMIT=50
+MISSING_TESTS_BANNER =   "-------------------------- Missing tests --------------------------"
+ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------"
+def test_has_at_least_one_matching_tag(test, tag_list):
+    return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
+def all_tests_have_at_least_one_matching_tag(results, tag_list):
+    return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
+def any_test_have_any_matching_tag(results, tag_list):
+    return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
+def have_skipped_test(result, test_prefix):
+    return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
+def have_all_tests_skipped(result, test_prefixes_list):
+    return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
+def guess_oeselftest_metadata(results):
+    """
+    When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
+    Check results for specific values (absence/presence of oetags, number and name of executed tests...),
+    and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
+    to it to allow proper test filtering.
+    This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
+    as new tests will have OESELFTEST_METADATA properly appended at test reporting time
+    """
+    if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
+        return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
+    elif all(result.startswith("reproducible") for result in results):
+        return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
+    elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
+        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
+    elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
+        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
+    elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
+        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
+    elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
+        if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
+            return OESELFTEST_METADATA_GUESS_TABLE['selftest']
+        elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
+            return OESELFTEST_METADATA_GUESS_TABLE['bringup']
+    return None
+def metadata_matches(base_configuration, target_configuration):
+    """
+    For passed base and target, check test type. If test type matches one of
+    properties described in METADATA_MATCH_TABLE, compare metadata if it is
+    present in base. Return true if metadata matches, or if base lacks some
+    data (either TEST_TYPE or the corresponding metadata)
+    """
+    test_type = base_configuration.get('TEST_TYPE')
+    if test_type not in METADATA_MATCH_TABLE:
+        return True
+    metadata_key = METADATA_MATCH_TABLE.get(test_type)
+    if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
+        return False
+    return True
+def machine_matches(base_configuration, target_configuration):
+    return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
+def can_be_compared(logger, base, target):
+    """
+    Some tests are not relevant to be compared, for example some oeselftest
+    run with different tests sets or parameters. Return true if tests can be
+    compared
+    """
+    ret = True
+    base_configuration = base['configuration']
+    target_configuration = target['configuration']
+    # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
+    if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
+        guess = guess_oeselftest_metadata(base['result'])
+        if guess is None:
+            logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
+        else:
+            logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
+            base_configuration['OESELFTEST_METADATA'] = guess
+    if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
+        guess = guess_oeselftest_metadata(target['result'])
+        if guess is None:
+            logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
+        else:
+            logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
+            target_configuration['OESELFTEST_METADATA'] = guess
+    # Test runs with LTP results in should only be compared with other runs with LTP tests in them
+    if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
+        ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
+    return ret and metadata_matches(base_configuration, target_configuration) \
+        and machine_matches(base_configuration, target_configuration)
+def get_status_str(raw_status):
+    raw_status_lower = raw_status.lower() if raw_status else "None"
+    return STATUS_STRINGS.get(raw_status_lower, raw_status)
+def get_additional_info_line(new_pass_count, new_tests):
+    result=[]
+    if new_tests:
+        result.append(f'+{new_tests} test(s) present')
+    if new_pass_count:
+        result.append(f'+{new_pass_count} test(s) now passing')
+    if not result:
+        return ""
+    return '    -> ' + ', '.join(result) + '\n'
+def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None):
    base_result = base_result.get('result')
    target_result = target_result.get('result')
    result = {}
+    new_tests = 0
+    regressions = {}
+    resultstring = ""
+    new_tests = 0
+    new_pass_count = 0
+    display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT
    if base_result and target_result:
        for k in base_result:
+            if k in ['ptestresult.rawlogs', 'ptestresult.sections']:
+                continue
            base_testcase = base_result[k]
            base_status = base_testcase.get('status')
            if base_status:
@@ -27,12 +223,47 @@ def compare_result(logger, base_name, target_name, base_result, target_result):
                    result[k] = {'base': base_status, 'target': target_status}
            else:
                logger.error('Failed to retrieved base test case status: %s' % k)
+        # Also count new tests that were not present in base results: it
+        # could be newly added tests, but it could also highlights some tests
+        # renames or fixed faulty ptests
+        for k in target_result:
+            if k not in base_result:
+                new_tests += 1
    if result:
-        resultstring = "Regression: %s\n            %s\n" % (base_name, target_name)
+        new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
-        for k in sorted(result):
+        # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
-            resultstring += '    %s: %s -> %s\n' % (k, result[k]['base'], result[k]['target'])
+        if new_pass_count < len(result):
+            resultstring = "Regression:  %s\n             %s\n" % (base_name, target_name)
+            for k in sorted(result):
+                if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
+                    # Differentiate each ptest kind when listing regressions
+                    key_parts = k.split('.')
+                    key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0]
+                    # Append new regression to corresponding test family
+                    regressions[key] = regressions.setdefault(key, []) + ['        %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))]
+            resultstring += f"    Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n"
+            for k in regressions:
+                resultstring += f"    {len(regressions[k])} regression(s) for {k}\n"
+                count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k])
+                resultstring += ''.join(regressions[k][:count_to_print])
+                if count_to_print < len(regressions[k]):
+                    resultstring+='        [...]\n'
+            if new_pass_count > 0:
+                resultstring += f'    Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
+            if new_tests > 0:
+                resultstring += f'    Additionally, {new_tests} new test(s) is/are present\n'
+        else:
+            resultstring = "%s\n%s\n" % (base_name, target_name)
+            result = None
    else:
-        resultstring = "Match: %s\n       %s" % (base_name, target_name)
+        resultstring = "%s\n%s\n" % (base_name, target_name)
+    if not result:
+        additional_info = get_additional_info_line(new_pass_count, new_tests)
+        if additional_info:
+            resultstring += additional_info
    return result, resultstring
 def get_results(logger, source):
@@ -44,12 +275,38 @@ def regression(args, logger):
    regression_common(args, logger, base_results, target_results)
+# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
+# Truncating the test names works since they contain file and line number identifiers
+# which allows us to match them without the random components.
+def fixup_ptest_names(results, logger):
+    for r in results:
+        for i in results[r]:
+            tests = list(results[r][i]['result'].keys())
+            for test in tests:
+                new = None
+                if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
+                    new = test.split("_-_")[0]
+                elif test.startswith(("ptestresult.curl.")) and "__" in test:
+                    new = test.split("__")[0]
+                elif test.startswith(("ptestresult.dbus.")) and "__" in test:
+                    new = test.split("__")[0]
+                elif test.startswith("ptestresult.binutils") and "build-st-" in test:
+                    new = test.split(" ")[0]
+                elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
+                    new = ".".join(test.split(".")[:2])
+                if new:
+                    results[r][i]['result'][new] = results[r][i]['result'][test]
+                    del results[r][i]['result'][test]
 def regression_common(args, logger, base_results, target_results):
    if args.base_result_id:
        base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
    if args.target_result_id:
        target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
+    fixup_ptest_names(base_results, logger)
+    fixup_ptest_names(target_results, logger)
    matches = []
    regressions = []
    notfound = []
@@ -62,7 +319,9 @@ def regression_common(args, logger, base_results, target_results):
            # removing any pairs which match
            for c in base.copy():
                for b in target.copy():
-                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
+                    if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
+                        continue
+                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
                    if not res:
                        matches.append(resstr)
                        base.remove(c)
@@ -71,15 +330,18 @@ def regression_common(args, logger, base_results, target_results):
            # Should only now see regressions, we may not be able to match multiple pairs directly
            for c in base:
                for b in target:
-                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
+                    if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
+                        continue
+                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
                    if res:
                        regressions.append(resstr)
        else:
            notfound.append("%s not found in target" % a)
-    print("\n".join(sorted(matches)))
    print("\n".join(sorted(regressions)))
+    print("\n" + MISSING_TESTS_BANNER + "\n")
    print("\n".join(sorted(notfound)))
+    print("\n" + ADDITIONAL_DATA_BANNER + "\n")
+    print("\n".join(sorted(matches)))
    return 0
 def regression_git(args, logger):
@@ -162,6 +424,7 @@ def register_commands(subparsers):
                              help='(optional) filter the base results to this result ID')
    parser_build.add_argument('-t', '--target-result-id', default='',
                              help='(optional) filter the target results to this result ID')
+    parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")
    parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
                                         description='regression analysis comparing base result set to target '
@@ -183,4 +446,5 @@ def register_commands(subparsers):
    parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
    parser_build.add_argument('--commit2', help="Revision to compare with")
    parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
+    parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")
diff --git a/scripts/lib/resulttool/report.py b/scripts/lib/resulttool/report.py
index f0ca50ebe2..1c100b00ab 100644
--- a/scripts/lib/resulttool/report.py
+++ b/scripts/lib/resulttool/report.py
@@ -176,7 +176,10 @@ class ResultsTextReport(object):
            vals['sort'] = line['testseries'] + "_" + line['result_id']
            vals['failed_testcases'] = line['failed_testcases']
            for k in cols:
-                vals[k] = "%d (%s%%)" % (line[k], format(line[k] / total_tested * 100, '.0f'))
+                if total_tested:
+                    vals[k] = "%d (%s%%)" % (line[k], format(line[k] / total_tested * 100, '.0f'))
+                else:
+                    vals[k] = "0 (0%)"
            for k in maxlen:
                if k in vals and len(vals[k]) > maxlen[k]:
                    maxlen[k] = len(vals[k])
@@ -253,7 +256,7 @@ class ResultsTextReport(object):
                if selected_test_case_only:
                    print_selected_testcase_result(raw_results, selected_test_case_only)
                else:
-                    print(json.dumps(raw_results, sort_keys=True, indent=4))
+                    print(json.dumps(raw_results, sort_keys=True, indent=1))
            else:
                print('Could not find raw test result for %s' % raw_test)
            return 0
diff --git a/scripts/lib/resulttool/resultutils.py b/scripts/lib/resulttool/resultutils.py
index 8917022d36..b8fc79a6ac 100644
--- a/scripts/lib/resulttool/resultutils.py
+++ b/scripts/lib/resulttool/resultutils.py
@@ -14,8 +14,11 @@ import scriptpath
 import copy
 import urllib.request
 import posixpath
+import logging
 scriptpath.add_oe_lib_path()
+logger = logging.getLogger('resulttool')
 flatten_map = {
    "oeselftest": [],
    "runtime": [],
@@ -31,13 +34,19 @@ regression_map = {
    "manual": ['TEST_TYPE', 'TEST_MODULE', 'IMAGE_BASENAME', 'MACHINE']
 }
 store_map = {
-    "oeselftest": ['TEST_TYPE'],
+    "oeselftest": ['TEST_TYPE', 'TESTSERIES', 'MACHINE'],
    "runtime": ['TEST_TYPE', 'DISTRO', 'MACHINE', 'IMAGE_BASENAME'],
    "sdk": ['TEST_TYPE', 'MACHINE', 'SDKMACHINE', 'IMAGE_BASENAME'],
    "sdkext": ['TEST_TYPE', 'MACHINE', 'SDKMACHINE', 'IMAGE_BASENAME'],
    "manual": ['TEST_TYPE', 'TEST_MODULE', 'MACHINE', 'IMAGE_BASENAME']
 }
+rawlog_sections = {
+    "ptestresult.rawlogs": "ptest",
+    "ltpresult.rawlogs": "ltp",
+    "ltpposixresult.rawlogs": "ltpposix"
+}
 def is_url(p):
    """
    Helper for determining if the given path is a URL
@@ -58,7 +67,11 @@ def append_resultsdata(results, f, configmap=store_map, configvars=extra_configv
            testseries = posixpath.basename(posixpath.dirname(url.path))
        else:
            with open(f, "r") as filedata:
-                data = json.load(filedata)
+                try:
+                    data = json.load(filedata)
+                except json.decoder.JSONDecodeError:
+                    print("Cannot decode {}. Possible corruption. Skipping.".format(f))
+                    data = ""
            testseries = os.path.basename(os.path.dirname(f))
    else:
        data = f
@@ -104,21 +117,57 @@ def filter_resultsdata(results, resultid):
                 newresults[r][i] = results[r][i]
    return newresults
-def strip_ptestresults(results):
+def strip_logs(results):
    newresults = copy.deepcopy(results)
-    #for a in newresults2:
-    #  newresults = newresults2[a]
    for res in newresults:
        if 'result' not in newresults[res]:
            continue
-        if 'ptestresult.rawlogs' in newresults[res]['result']:
+        for logtype in rawlog_sections:
-            del newresults[res]['result']['ptestresult.rawlogs']
+            if logtype in newresults[res]['result']:
+                del newresults[res]['result'][logtype]
        if 'ptestresult.sections' in newresults[res]['result']:
            for i in newresults[res]['result']['ptestresult.sections']:
                if 'log' in newresults[res]['result']['ptestresult.sections'][i]:
                    del newresults[res]['result']['ptestresult.sections'][i]['log']
    return newresults
+# For timing numbers, crazy amounts of precision don't make sense and just confuse
+# the logs. For numbers over 1, trim to 3 decimal places, for numbers less than 1,
+# trim to 4 significant digits
+def trim_durations(results):
+    for res in results:
+        if 'result' not in results[res]:
+            continue
+        for entry in results[res]['result']:
+            if 'duration' in results[res]['result'][entry]:
+                duration = results[res]['result'][entry]['duration']
+                if duration > 1:
+                    results[res]['result'][entry]['duration'] = float("%.3f" % duration)
+                elif duration < 1:
+                    results[res]['result'][entry]['duration'] = float("%.4g" % duration)
+    return results
+def handle_cleanups(results):
+    # Remove pointless path duplication from old format reproducibility results
+    for res2 in results:
+        try:
+            section = results[res2]['result']['reproducible']['files']
+            for pkgtype in section:
+                for filelist in section[pkgtype].copy():
+                    if section[pkgtype][filelist] and type(section[pkgtype][filelist][0]) == dict:
+                        newlist = []
+                        for entry in section[pkgtype][filelist]:
+                            newlist.append(entry["reference"].split("/./")[1])
+                        section[pkgtype][filelist] = newlist
+        except KeyError:
+            pass
+    # Remove pointless duplicate rawlogs data
+    try:
+        del results[res2]['result']['reproducible.rawlogs']
+    except KeyError:
+        pass
 def decode_log(logdata):
    if isinstance(logdata, str):
        return logdata
@@ -142,7 +191,7 @@ def generic_get_log(sectionname, results, section):
    return decode_log(ptest['log'])
 def ptestresult_get_log(results, section):
-    return generic_get_log('ptestresuls.sections', results, section)
+    return generic_get_log('ptestresult.sections', results, section)
 def generic_get_rawlogs(sectname, results):
    if sectname not in results:
@@ -151,9 +200,6 @@ def generic_get_rawlogs(sectname, results):
        return None
    return decode_log(results[sectname]['log'])
-def ptestresult_get_rawlogs(results):
-    return generic_get_rawlogs('ptestresult.rawlogs', results)
 def save_resultsdata(results, destdir, fn="testresults.json", ptestjson=False, ptestlogs=False):
    for res in results:
        if res:
@@ -163,16 +209,20 @@ def save_resultsdata(results, destdir, fn="testresults.json", ptestjson=False, p
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        resultsout = results[res]
        if not ptestjson:
-            resultsout = strip_ptestresults(results[res])
+            resultsout = strip_logs(results[res])
+        trim_durations(resultsout)
+        handle_cleanups(resultsout)
        with open(dst, 'w') as f:
-            f.write(json.dumps(resultsout, sort_keys=True, indent=4))
+            f.write(json.dumps(resultsout, sort_keys=True, indent=1))
        for res2 in results[res]:
            if ptestlogs and 'result' in results[res][res2]:
                seriesresults = results[res][res2]['result']
-                rawlogs = ptestresult_get_rawlogs(seriesresults)
+                for logtype in rawlog_sections:
-                if rawlogs is not None:
+                    logdata = generic_get_rawlogs(logtype, seriesresults)
-                    with open(dst.replace(fn, "ptest-raw.log"), "w+") as f:
+                    if logdata is not None:
-                        f.write(rawlogs)
+                        logger.info("Extracting " + rawlog_sections[logtype] + "-raw.log")
+                        with open(dst.replace(fn, rawlog_sections[logtype] + "-raw.log"), "w+") as f:
+                            f.write(logdata)
                if 'ptestresult.sections' in seriesresults:
                    for i in seriesresults['ptestresult.sections']:
                        sectionlog = ptestresult_get_log(seriesresults, i)
diff --git a/scripts/lib/resulttool/store.py b/scripts/lib/resulttool/store.py
index e0951f0a8f..b143334e69 100644
--- a/scripts/lib/resulttool/store.py
+++ b/scripts/lib/resulttool/store.py
@@ -65,18 +65,35 @@ def store(args, logger):
        for r in revisions:
            results = revisions[r]
+            if args.revision and r[0] != args.revision:
+                logger.info('skipping %s as non-matching' % r[0])
+                continue
            keywords = {'commit': r[0], 'branch': r[1], "commit_count": r[2]}
-            subprocess.check_call(["find", tempdir, "!", "-path", "./.git/*", "-delete"])
+            subprocess.check_call(["find", tempdir, "-name", "testresults.json", "!", "-path", "./.git/*", "-delete"])
            resultutils.save_resultsdata(results, tempdir, ptestlogs=True)
            logger.info('Storing test result into git repository %s' % args.git_dir)
-            gitarchive.gitarchive(tempdir, args.git_dir, False, False,
+            excludes = []
+            if args.logfile_archive:
+                excludes = ['*.log', "*.log.zst"]
+            tagname = gitarchive.gitarchive(tempdir, args.git_dir, False, False,
                                  "Results of {branch}:{commit}", "branch: {branch}\ncommit: {commit}", "{branch}",
                                  False, "{branch}/{commit_count}-g{commit}/{tag_number}",
                                  'Test run #{tag_number} of {branch}:{commit}', '',
-                                  [], [], False, keywords, logger)
+                                  excludes, [], False, keywords, logger)
+            if args.logfile_archive:
+                logdir = args.logfile_archive + "/" + tagname
+                shutil.copytree(tempdir, logdir)
+                os.chmod(logdir, 0o755)
+                for root, dirs,  files in os.walk(logdir):
+                    for name in files:
+                        if not name.endswith(".log"):
+                            continue
+                        f = os.path.join(root, name)
+                        subprocess.run(["zstd", f, "--rm"], check=True, capture_output=True)
    finally:
        subprocess.check_call(["rm", "-rf",  tempdir])
@@ -102,3 +119,7 @@ def register_commands(subparsers):
                              help='add executed-by configuration to each result file')
    parser_build.add_argument('-t', '--extra-test-env', default='',
                              help='add extra test environment data to each result file configuration')
+    parser_build.add_argument('-r', '--revision', default='',
+                              help='only store data for the specified revision')
+    parser_build.add_argument('-l', '--logfile-archive', default='',
+                              help='directory to separately archive log files along with a copy of the results')