GNU/CI: use the aggregated-result.json files and move to python (#7471)

* GNU/CI: use the aggregated-result.json files and move to python Co-authored-by: Daniel Hofstetter <daniel.hofstetter@42dh.com> * simplify code Co-authored-by: Daniel Hofstetter <daniel.hofstetter@42dh.com> --------- Co-authored-by: Daniel Hofstetter <daniel.hofstetter@42dh.com>
2025-07-27 11:07:44 +00:00 · 2025-03-19 07:15:11 +01:00 · 2025-03-19 07:15:11 +01:00 · 38aee73fe5
commit 38aee73fe5
parent c3098cce35
2 changed files with 226 additions and 117 deletions
--- a/util/compare_test_results.py
+++ b/util/compare_test_results.py
@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+"""
+Compare GNU test results between current run and reference to identify
+regressions and fixes.
+
+
+Arguments:
+    CURRENT_JSON       Path to the current run's aggregated results JSON file
+    REFERENCE_JSON     Path to the reference (main branch) aggregated
+                        results JSON file
+    --ignore-file      Path to file containing list of tests to ignore
+                        (for intermittent issues)
+    --output           Path to output file for GitHub comment content
+"""
+
+import argparse
+import json
+import os
+import sys
+
+
+def flatten_test_results(results):
+    """Convert nested JSON test results to a flat dictionary of test paths to statuses."""
+    flattened = {}
+    for util, tests in results.items():
+        for test_name, status in tests.items():
+            test_path = f"{util}/{test_name}"
+            flattened[test_path] = status
+    return flattened
+
+
+def load_ignore_list(ignore_file):
+    """Load list of tests to ignore from file."""
+    if not os.path.exists(ignore_file):
+        return set()
+
+    with open(ignore_file, "r") as f:
+        return {line.strip() for line in f if line.strip() and not line.startswith("#")}
+
+
+def identify_test_changes(current_flat, reference_flat):
+    """
+    Identify different categories of test changes between current and reference results.
+
+    Args:
+        current_flat (dict): Flattened dictionary of current test results
+        reference_flat (dict): Flattened dictionary of reference test results
+
+    Returns:
+        tuple: Four lists containing regressions, fixes, newly_skipped, and newly_passing tests
+    """
+    # Find regressions (tests that were passing but now failing)
+    regressions = []
+    for test_path, status in current_flat.items():
+        if status in ("FAIL", "ERROR"):
+            if test_path in reference_flat:
+                if reference_flat[test_path] in ("PASS", "SKIP"):
+
+                    regressions.append(test_path)
+
+    # Find fixes (tests that were failing but now passing)
+    fixes = []
+    for test_path, status in reference_flat.items():
+        if status in ("FAIL", "ERROR"):
+            if test_path in current_flat:
+                if current_flat[test_path] == "PASS":
+                    fixes.append(test_path)
+
+    # Find newly skipped tests (were passing, now skipped)
+    newly_skipped = []
+    for test_path, status in current_flat.items():
+        if (
+            status == "SKIP"
+            and test_path in reference_flat
+            and reference_flat[test_path] == "PASS"
+        ):
+            newly_skipped.append(test_path)
+
+    # Find newly passing tests (were skipped, now passing)
+    newly_passing = []
+    for test_path, status in current_flat.items():
+        if (
+            status == "PASS"
+            and test_path in reference_flat
+            and reference_flat[test_path] == "SKIP"
+        ):
+            newly_passing.append(test_path)
+
+    return regressions, fixes, newly_skipped, newly_passing
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Compare GNU test results and identify regressions and fixes"
+    )
+    parser.add_argument("current_json", help="Path to current run JSON results")
+    parser.add_argument("reference_json", help="Path to reference JSON results")
+    parser.add_argument(
+        "--ignore-file",
+        required=True,
+        help="Path to file with tests to ignore (for intermittent issues)",
+    )
+    parser.add_argument("--output", help="Path to output file for GitHub comment")
+
+    args = parser.parse_args()
+
+    # Load test results
+    try:
+        with open(args.current_json, "r") as f:
+            current_results = json.load(f)
+    except (FileNotFoundError, json.JSONDecodeError) as e:
+        sys.stderr.write(f"Error loading current results: {e}\n")
+        return 1
+
+    try:
+        with open(args.reference_json, "r") as f:
+            reference_results = json.load(f)
+    except (FileNotFoundError, json.JSONDecodeError) as e:
+        sys.stderr.write(f"Error loading reference results: {e}\n")
+        sys.stderr.write("Skipping comparison as reference is not available.\n")
+        return 0
+
+    # Load ignore list (required)
+    if not os.path.exists(args.ignore_file):
+        sys.stderr.write(f"Error: Ignore file {args.ignore_file} does not exist\n")
+        return 1
+
+    ignore_list = load_ignore_list(args.ignore_file)
+    print(f"Loaded {len(ignore_list)} tests to ignore from {args.ignore_file}")
+
+    # Flatten result structures for easier comparison
+    current_flat = flatten_test_results(current_results)
+    reference_flat = flatten_test_results(reference_results)
+
+    # Identify different categories of test changes
+    regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
+        current_flat, reference_flat
+    )
+
+    # Filter out intermittent issues from regressions
+    real_regressions = [r for r in regressions if r not in ignore_list]
+    intermittent_regressions = [r for r in regressions if r in ignore_list]
+
+    # Print summary stats
+    print(f"Total tests in current run: {len(current_flat)}")
+    print(f"Total tests in reference: {len(reference_flat)}")
+    print(f"New regressions: {len(real_regressions)}")
+    print(f"Intermittent regressions: {len(intermittent_regressions)}")
+    print(f"Fixed tests: {len(fixes)}")
+    print(f"Newly skipped tests: {len(newly_skipped)}")
+    print(f"Newly passing tests (previously skipped): {len(newly_passing)}")
+
+    output_lines = []
+
+    # Report regressions
+    if real_regressions:
+        print("\nREGRESSIONS (non-intermittent failures):", file=sys.stderr)
+        for test in sorted(real_regressions):
+            msg = f"GNU test failed: {test}. {test} is passing on 'main'. Maybe you have to rebase?"
+            print(f"::error ::{msg}", file=sys.stderr)
+            output_lines.append(msg)
+
+    # Report intermittent issues
+    if intermittent_regressions:
+        print("\nINTERMITTENT ISSUES (ignored):", file=sys.stderr)
+        for test in sorted(intermittent_regressions):
+            msg = f"Skip an intermittent issue {test} (fails in this run but passes in the 'main' branch)"
+            print(f"::notice ::{msg}", file=sys.stderr)
+            output_lines.append(msg)
+
+    # Report fixes
+    if fixes:
+        print("\nFIXED TESTS:", file=sys.stderr)
+        for test in sorted(fixes):
+            msg = f"Congrats! The gnu test {test} is no longer failing!"
+            print(f"::notice ::{msg}", file=sys.stderr)
+            output_lines.append(msg)
+
+    # Report newly skipped and passing tests
+    if newly_skipped:
+        print("\nNEWLY SKIPPED TESTS:", file=sys.stderr)
+        for test in sorted(newly_skipped):
+            msg = f"Note: The gnu test {test} is now being skipped but was previously passing."
+            print(f"::warning ::{msg}", file=sys.stderr)
+            output_lines.append(msg)
+
+    if newly_passing:
+        print("\nNEWLY PASSING TESTS (previously skipped):", file=sys.stderr)
+        for test in sorted(newly_passing):
+            msg = f"Congrats! The gnu test {test} is now passing!"
+            print(f"::notice ::{msg}", file=sys.stderr)
+            output_lines.append(msg)
+
+    if args.output and output_lines:
+        with open(args.output, "w") as f:
+            for line in output_lines:
+                f.write(f"{line}\n")
+
+    # Return exit code based on whether we found regressions
+    return 1 if real_regressions else 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())