diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml index 2860c0a77..e87805573 100644 --- a/.github/workflows/GnuTests.yml +++ b/.github/workflows/GnuTests.yml @@ -349,137 +349,42 @@ jobs: - name: Compare test failures VS reference shell: bash run: | - ## Compare test failures VS reference - have_new_failures="" - REF_LOG_FILE='${{ steps.vars.outputs.path_reference }}/test-logs/test-suite.log' - ROOT_REF_LOG_FILE='${{ steps.vars.outputs.path_reference }}/test-logs/test-suite-root.log' - SELINUX_REF_LOG_FILE='${{ steps.vars.outputs.path_reference }}/test-logs/selinux-test-suite.log' - SELINUX_ROOT_REF_LOG_FILE='${{ steps.vars.outputs.path_reference }}/test-logs/selinux-test-suite-root.log' - REF_SUMMARY_FILE='${{ steps.vars.outputs.path_reference }}/test-summary/gnu-result.json' - - + ## Compare test failures VS reference using JSON files + REF_SUMMARY_FILE='${{ steps.vars.outputs.path_reference }}/aggregated-result/aggregated-result.json' + CURRENT_SUMMARY_FILE='${{ steps.vars.outputs.AGGREGATED_SUMMARY_FILE }}' REPO_DEFAULT_BRANCH='${{ steps.vars.outputs.repo_default_branch }}' path_UUTILS='${{ steps.vars.outputs.path_UUTILS }}' - # https://github.com/uutils/coreutils/issues/4294 - # https://github.com/uutils/coreutils/issues/4295 + + # Path to ignore file for intermittent issues IGNORE_INTERMITTENT="${path_UUTILS}/.github/workflows/ignore-intermittent.txt" - mkdir -p ${{ steps.vars.outputs.path_reference }} - + # Set up comment directory COMMENT_DIR="${{ steps.vars.outputs.path_reference }}/comment" mkdir -p ${COMMENT_DIR} echo ${{ github.event.number }} > ${COMMENT_DIR}/NR COMMENT_LOG="${COMMENT_DIR}/result.txt" - # The comment log might be downloaded from a previous run - # We only want the new changes, so remove it if it exists. - rm -f ${COMMENT_LOG} - touch ${COMMENT_LOG} + COMPARISON_RESULT=0 + if test -f "${CURRENT_SUMMARY_FILE}"; then + if test -f "${REF_SUMMARY_FILE}"; then + echo "Reference summary SHA1/ID: $(sha1sum -- "${REF_SUMMARY_FILE}")" + echo "Current summary SHA1/ID: $(sha1sum -- "${CURRENT_SUMMARY_FILE}")" - compare_tests() { - local new_log_file=$1 - local ref_log_file=$2 - local test_type=$3 # "standard" or "root" - - if test -f "${ref_log_file}"; then - echo "Reference ${test_type} test log SHA1/ID: $(sha1sum -- "${ref_log_file}") - ${test_type}" - REF_ERROR=$(sed -n "s/^ERROR: \([[:print:]]\+\).*/\1/p" "${ref_log_file}"| sort) - CURRENT_RUN_ERROR=$(sed -n "s/^ERROR: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort) - REF_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" "${ref_log_file}"| sort) - CURRENT_RUN_FAILING=$(sed -n "s/^FAIL: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort) - REF_SKIP=$(sed -n "s/^SKIP: \([[:print:]]\+\).*/\1/p" "${ref_log_file}"| sort) - CURRENT_RUN_SKIP=$(sed -n "s/^SKIP: \([[:print:]]\+\).*/\1/p" "${new_log_file}" | sort) - - echo "Detailed information:" - echo "REF_ERROR = ${REF_ERROR}" - echo "CURRENT_RUN_ERROR = ${CURRENT_RUN_ERROR}" - echo "REF_FAILING = ${REF_FAILING}" - echo "CURRENT_RUN_FAILING = ${CURRENT_RUN_FAILING}" - echo "REF_SKIP_PASS = ${REF_SKIP}" - echo "CURRENT_RUN_SKIP = ${CURRENT_RUN_SKIP}" - - # Compare failing and error tests - for LINE in ${CURRENT_RUN_FAILING} - do - if ! grep -Fxq ${LINE}<<<"${REF_FAILING}" - then - if ! grep ${LINE} ${IGNORE_INTERMITTENT} - then - MSG="GNU test failed: ${LINE}. ${LINE} is passing on '${REPO_DEFAULT_BRANCH}'. Maybe you have to rebase?" - echo "::error ::$MSG" - echo $MSG >> ${COMMENT_LOG} - have_new_failures="true" - else - MSG="Skip an intermittent issue ${LINE} (fails in this run but passes in the 'main' branch)" - echo "::notice ::$MSG" - echo $MSG >> ${COMMENT_LOG} - echo "" - fi - fi - done - - for LINE in ${REF_FAILING} - do - if ! grep -Fxq ${LINE}<<<"${CURRENT_RUN_FAILING}" - then - if ! grep ${LINE} ${IGNORE_INTERMITTENT} - then - MSG="Congrats! The gnu test ${LINE} is no longer failing!" - echo "::notice ::$MSG" - echo $MSG >> ${COMMENT_LOG} - else - MSG="Skipping an intermittent issue ${LINE} (passes in this run but fails in the 'main' branch)" - echo "::notice ::$MSG" - echo $MSG >> ${COMMENT_LOG} - echo "" - fi - fi - done - - for LINE in ${CURRENT_RUN_ERROR} - do - if ! grep -Fxq ${LINE}<<<"${REF_ERROR}" - then - MSG="GNU test error: ${LINE}. ${LINE} is passing on '${REPO_DEFAULT_BRANCH}'. Maybe you have to rebase?" - echo "::error ::$MSG" - echo $MSG >> ${COMMENT_LOG} - have_new_failures="true" - fi - done - - for LINE in ${REF_ERROR} - do - if ! grep -Fxq ${LINE}<<<"${CURRENT_RUN_ERROR}" - then - MSG="Congrats! The gnu test ${LINE} is no longer ERROR! (might be PASS or FAIL)" - echo "::warning ::$MSG" - echo $MSG >> ${COMMENT_LOG} - fi - done - - for LINE in ${REF_SKIP} - do - if ! grep -Fxq ${LINE}<<<"${CURRENT_RUN_SKIP}" - then - MSG="Congrats! The gnu test ${LINE} is no longer SKIP! (might be PASS, ERROR or FAIL)" - echo "::warning ::$MSG" - echo $MSG >> ${COMMENT_LOG} - fi - done + python3 ${path_UUTILS}/util/compare_test_results.py \ + --ignore-file "${IGNORE_INTERMITTENT}" \ + --output "${COMMENT_LOG}" \ + "${CURRENT_SUMMARY_FILE}" "${REF_SUMMARY_FILE}" + COMPARISON_RESULT=$? else - echo "::warning ::Skipping ${test_type} test failure comparison; no prior reference test logs are available." + echo "::warning ::Skipping test comparison; no prior reference summary is available at '${REF_SUMMARY_FILE}'." fi - } + else + echo "::error ::Failed to find summary of test results (missing '${CURRENT_SUMMARY_FILE}'); failing early" + exit 1 + fi - # Compare standard tests - compare_tests '${{ steps.vars.outputs.path_GNU_tests }}/test-suite.log' "${REF_LOG_FILE}" "standard" - - # Compare root tests - compare_tests '${{ steps.vars.outputs.path_GNU_tests }}/test-suite-root.log' "${ROOT_REF_LOG_FILE}" "root" - - # Set environment variable to indicate whether all failures are intermittent - if [ -n "${have_new_failures}" ]; then + if [ ${COMPARISON_RESULT} -eq 1 ]; then echo "ONLY_INTERMITTENT=false" >> $GITHUB_ENV echo "::error ::Found new non-intermittent test failures" exit 1 diff --git a/util/compare_test_results.py b/util/compare_test_results.py new file mode 100644 index 000000000..273d2a2ff --- /dev/null +++ b/util/compare_test_results.py @@ -0,0 +1,204 @@ +#!/usr/bin/env python3 +""" +Compare GNU test results between current run and reference to identify +regressions and fixes. + + +Arguments: + CURRENT_JSON Path to the current run's aggregated results JSON file + REFERENCE_JSON Path to the reference (main branch) aggregated + results JSON file + --ignore-file Path to file containing list of tests to ignore + (for intermittent issues) + --output Path to output file for GitHub comment content +""" + +import argparse +import json +import os +import sys + + +def flatten_test_results(results): + """Convert nested JSON test results to a flat dictionary of test paths to statuses.""" + flattened = {} + for util, tests in results.items(): + for test_name, status in tests.items(): + test_path = f"{util}/{test_name}" + flattened[test_path] = status + return flattened + + +def load_ignore_list(ignore_file): + """Load list of tests to ignore from file.""" + if not os.path.exists(ignore_file): + return set() + + with open(ignore_file, "r") as f: + return {line.strip() for line in f if line.strip() and not line.startswith("#")} + + +def identify_test_changes(current_flat, reference_flat): + """ + Identify different categories of test changes between current and reference results. + + Args: + current_flat (dict): Flattened dictionary of current test results + reference_flat (dict): Flattened dictionary of reference test results + + Returns: + tuple: Four lists containing regressions, fixes, newly_skipped, and newly_passing tests + """ + # Find regressions (tests that were passing but now failing) + regressions = [] + for test_path, status in current_flat.items(): + if status in ("FAIL", "ERROR"): + if test_path in reference_flat: + if reference_flat[test_path] in ("PASS", "SKIP"): + + regressions.append(test_path) + + # Find fixes (tests that were failing but now passing) + fixes = [] + for test_path, status in reference_flat.items(): + if status in ("FAIL", "ERROR"): + if test_path in current_flat: + if current_flat[test_path] == "PASS": + fixes.append(test_path) + + # Find newly skipped tests (were passing, now skipped) + newly_skipped = [] + for test_path, status in current_flat.items(): + if ( + status == "SKIP" + and test_path in reference_flat + and reference_flat[test_path] == "PASS" + ): + newly_skipped.append(test_path) + + # Find newly passing tests (were skipped, now passing) + newly_passing = [] + for test_path, status in current_flat.items(): + if ( + status == "PASS" + and test_path in reference_flat + and reference_flat[test_path] == "SKIP" + ): + newly_passing.append(test_path) + + return regressions, fixes, newly_skipped, newly_passing + + +def main(): + parser = argparse.ArgumentParser( + description="Compare GNU test results and identify regressions and fixes" + ) + parser.add_argument("current_json", help="Path to current run JSON results") + parser.add_argument("reference_json", help="Path to reference JSON results") + parser.add_argument( + "--ignore-file", + required=True, + help="Path to file with tests to ignore (for intermittent issues)", + ) + parser.add_argument("--output", help="Path to output file for GitHub comment") + + args = parser.parse_args() + + # Load test results + try: + with open(args.current_json, "r") as f: + current_results = json.load(f) + except (FileNotFoundError, json.JSONDecodeError) as e: + sys.stderr.write(f"Error loading current results: {e}\n") + return 1 + + try: + with open(args.reference_json, "r") as f: + reference_results = json.load(f) + except (FileNotFoundError, json.JSONDecodeError) as e: + sys.stderr.write(f"Error loading reference results: {e}\n") + sys.stderr.write("Skipping comparison as reference is not available.\n") + return 0 + + # Load ignore list (required) + if not os.path.exists(args.ignore_file): + sys.stderr.write(f"Error: Ignore file {args.ignore_file} does not exist\n") + return 1 + + ignore_list = load_ignore_list(args.ignore_file) + print(f"Loaded {len(ignore_list)} tests to ignore from {args.ignore_file}") + + # Flatten result structures for easier comparison + current_flat = flatten_test_results(current_results) + reference_flat = flatten_test_results(reference_results) + + # Identify different categories of test changes + regressions, fixes, newly_skipped, newly_passing = identify_test_changes( + current_flat, reference_flat + ) + + # Filter out intermittent issues from regressions + real_regressions = [r for r in regressions if r not in ignore_list] + intermittent_regressions = [r for r in regressions if r in ignore_list] + + # Print summary stats + print(f"Total tests in current run: {len(current_flat)}") + print(f"Total tests in reference: {len(reference_flat)}") + print(f"New regressions: {len(real_regressions)}") + print(f"Intermittent regressions: {len(intermittent_regressions)}") + print(f"Fixed tests: {len(fixes)}") + print(f"Newly skipped tests: {len(newly_skipped)}") + print(f"Newly passing tests (previously skipped): {len(newly_passing)}") + + output_lines = [] + + # Report regressions + if real_regressions: + print("\nREGRESSIONS (non-intermittent failures):", file=sys.stderr) + for test in sorted(real_regressions): + msg = f"GNU test failed: {test}. {test} is passing on 'main'. Maybe you have to rebase?" + print(f"::error ::{msg}", file=sys.stderr) + output_lines.append(msg) + + # Report intermittent issues + if intermittent_regressions: + print("\nINTERMITTENT ISSUES (ignored):", file=sys.stderr) + for test in sorted(intermittent_regressions): + msg = f"Skip an intermittent issue {test} (fails in this run but passes in the 'main' branch)" + print(f"::notice ::{msg}", file=sys.stderr) + output_lines.append(msg) + + # Report fixes + if fixes: + print("\nFIXED TESTS:", file=sys.stderr) + for test in sorted(fixes): + msg = f"Congrats! The gnu test {test} is no longer failing!" + print(f"::notice ::{msg}", file=sys.stderr) + output_lines.append(msg) + + # Report newly skipped and passing tests + if newly_skipped: + print("\nNEWLY SKIPPED TESTS:", file=sys.stderr) + for test in sorted(newly_skipped): + msg = f"Note: The gnu test {test} is now being skipped but was previously passing." + print(f"::warning ::{msg}", file=sys.stderr) + output_lines.append(msg) + + if newly_passing: + print("\nNEWLY PASSING TESTS (previously skipped):", file=sys.stderr) + for test in sorted(newly_passing): + msg = f"Congrats! The gnu test {test} is now passing!" + print(f"::notice ::{msg}", file=sys.stderr) + output_lines.append(msg) + + if args.output and output_lines: + with open(args.output, "w") as f: + for line in output_lines: + f.write(f"{line}\n") + + # Return exit code based on whether we found regressions + return 1 if real_regressions else 0 + + +if __name__ == "__main__": + sys.exit(main())