Add gcov-based test pruning with coverage cache #4997
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: 'Test Suite' | |
| on: | |
| push: | |
| branches: [master] | |
| pull_request: | |
| types: [opened, synchronize, reopened, ready_for_review] | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| lint-gate: | |
| name: Lint Gate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| - name: Setup Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| - name: Initialize MFC | |
| run: ./mfc.sh init | |
| - name: Check Formatting | |
| run: | | |
| ./mfc.sh format -j "$(nproc)" | |
| git diff --exit-code || (echo "::error::Code is not formatted. Run './mfc.sh format' locally." && exit 1) | |
| - name: Spell Check | |
| run: ./mfc.sh spelling | |
| - name: Lint Toolchain | |
| run: ./mfc.sh lint | |
| - name: Lint Source - No Raw Directives | |
| run: | | |
| ! grep -iR '!\$acc\|!\$omp' --exclude="parallel_macros.fpp" --exclude="acc_macros.fpp" --exclude="omp_macros.fpp" --exclude="shared_parallel_macros.fpp" --exclude="syscheck.fpp" ./src/* | |
| - name: Lint Source - No Double Precision Intrinsics | |
| run: | | |
| ! grep -iR 'double_precision\|dsqrt\|dexp\|dlog\|dble\|dabs\|double\ precision\|real(8)\|real(4)\|dprod\|dmin\|dmax\|dfloat\|dreal\|dcos\|dsin\|dtan\|dsign\|dtanh\|dsinh\|dcosh\|d0' --exclude-dir=syscheck --exclude="*nvtx*" --exclude="*precision_select*" ./src/* | |
| - name: Lint Source - No Junk Code | |
| run: | | |
| ! grep -iR -e '\.\.\.' -e '\-\-\-' -e '===' ./src/* | |
| - name: Lint Docs | |
| run: python3 toolchain/mfc/lint_docs.py | |
| file-changes: | |
| name: Detect File Changes | |
| runs-on: 'ubuntu-latest' | |
| outputs: | |
| checkall: ${{ steps.changes.outputs.checkall }} | |
| cases_py: ${{ steps.changes.outputs.cases_py }} | |
| dep_changed: ${{ steps.dep-check.outputs.dep_changed }} | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| - name: Detect Changes | |
| uses: dorny/paths-filter@v3 | |
| id: changes | |
| with: | |
| filters: ".github/file-filter.yml" | |
| - name: Check for Fortran dependency changes | |
| id: dep-check | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| # Detect added/removed use/include statements that change the | |
| # Fortran dependency graph, which would make the coverage cache stale. | |
| if [ "${{ github.event_name }}" = "pull_request" ]; then | |
| DIFF=$(gh pr diff ${{ github.event.pull_request.number }}) | |
| elif [ "${{ github.event_name }}" = "push" ]; then | |
| DIFF=$(git diff ${{ github.event.before }}..${{ github.event.after }} 2>/dev/null || echo "") | |
| else | |
| DIFF="" | |
| fi | |
| if echo "$DIFF" | \ | |
| grep -qP '^\+\s*(use[\s,]+\w|#:include\s|include\s+['"'"'"])'; then | |
| echo "dep_changed=true" >> "$GITHUB_OUTPUT" | |
| echo "Fortran dependency change detected — will rebuild coverage cache." | |
| else | |
| echo "dep_changed=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| rebuild-cache: | |
| name: Rebuild Coverage Cache | |
| needs: [lint-gate, file-changes] | |
| if: >- | |
| github.repository == 'MFlowCode/MFC' && | |
| ( | |
| (github.event_name == 'pull_request' && | |
| (needs.file-changes.outputs.cases_py == 'true' || | |
| needs.file-changes.outputs.dep_changed == 'true')) || | |
| (github.event_name == 'push' && | |
| (needs.file-changes.outputs.cases_py == 'true' || | |
| needs.file-changes.outputs.dep_changed == 'true')) || | |
| github.event_name == 'workflow_dispatch' | |
| ) | |
| timeout-minutes: 240 | |
| runs-on: | |
| group: phoenix | |
| labels: gt | |
| permissions: | |
| contents: write | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| clean: false | |
| - name: Rebuild Cache via SLURM | |
| run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/rebuild-cache.sh cpu none | |
| - name: Print Logs | |
| if: always() | |
| run: cat rebuild-cache-cpu-none.out | |
| - name: Upload Cache Artifact | |
| if: github.event_name == 'pull_request' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: coverage-cache | |
| path: toolchain/mfc/test/test_coverage_cache.json.gz | |
| retention-days: 1 | |
| - name: Commit Cache to Master | |
| if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| git add toolchain/mfc/test/test_coverage_cache.json.gz | |
| if git diff --cached --quiet; then | |
| echo "Coverage cache unchanged." | |
| else | |
| git commit -m "Regenerate gcov coverage cache [skip ci]" | |
| git push | |
| fi | |
| github: | |
| name: Github | |
| needs: [lint-gate, file-changes, rebuild-cache] | |
| if: >- | |
| always() && | |
| needs.lint-gate.result == 'success' && | |
| needs.file-changes.result == 'success' && | |
| needs.rebuild-cache.result != 'cancelled' && | |
| needs.file-changes.outputs.checkall == 'true' | |
| strategy: | |
| matrix: | |
| os: ['ubuntu', 'macos'] | |
| mpi: ['mpi'] | |
| precision: [''] | |
| debug: ['debug', 'no-debug'] | |
| intel: [true, false] | |
| exclude: | |
| - os: macos | |
| intel: true | |
| include: | |
| - os: ubuntu | |
| mpi: no-mpi | |
| precision: single | |
| debug: no-debug | |
| intel: false | |
| fail-fast: false | |
| runs-on: ${{ matrix.os }}-latest | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| - name: Fetch master for coverage diff | |
| run: | | |
| git fetch origin master:master --depth=1 | |
| git fetch --deepen=200 | |
| continue-on-error: true | |
| - name: Download Coverage Cache | |
| if: needs.rebuild-cache.result == 'success' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: coverage-cache | |
| path: toolchain/mfc/test | |
| continue-on-error: true | |
| - name: Setup MacOS | |
| if: matrix.os == 'macos' | |
| run: | | |
| brew update | |
| brew upgrade | |
| brew install coreutils python fftw hdf5 gcc@15 boost open-mpi lapack | |
| echo "FC=gfortran-15" >> $GITHUB_ENV | |
| echo "BOOST_INCLUDE=/opt/homebrew/include/" >> $GITHUB_ENV | |
| - name: Setup Ubuntu | |
| if: matrix.os == 'ubuntu' && matrix.intel == false | |
| run: | | |
| sudo apt update -y | |
| sudo apt install -y cmake gcc g++ python3 python3-dev hdf5-tools \ | |
| libfftw3-dev libhdf5-dev openmpi-bin libopenmpi-dev \ | |
| libblas-dev liblapack-dev | |
| - name: Setup Ubuntu (Intel) | |
| if: matrix.os == 'ubuntu' && matrix.intel == true | |
| run: | | |
| wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | |
| sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | |
| sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" | |
| sudo apt-get update | |
| sudo apt-get install -y intel-oneapi-compiler-fortran intel-oneapi-mpi intel-oneapi-mpi-devel | |
| source /opt/intel/oneapi/setvars.sh | |
| printenv >> $GITHUB_ENV | |
| - name: Set up Python 3.14 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.14' | |
| - name: Get system info for cache key | |
| id: sys-info | |
| run: | | |
| { | |
| uname -m | |
| cat /proc/cpuinfo 2>/dev/null | grep 'model name' | head -1 || sysctl -n machdep.cpu.brand_string 2>/dev/null || true | |
| if command -v ifx &>/dev/null; then ifx --version 2>/dev/null | head -1; else ${FC:-gfortran} --version 2>/dev/null | head -1 || true; fi | |
| ${CC:-gcc} --version 2>/dev/null | head -1 || true | |
| } | (sha256sum 2>/dev/null || shasum -a 256) | cut -c1-16 > /tmp/sys-hash | |
| echo "sys-hash=$(cat /tmp/sys-hash)" >> "$GITHUB_OUTPUT" | |
| - name: Restore Build Cache | |
| uses: actions/cache@v4 | |
| with: | |
| path: build | |
| key: mfc-build-${{ matrix.os }}-${{ matrix.mpi }}-${{ matrix.debug }}-${{ matrix.precision }}-${{ matrix.intel }}-${{ steps.sys-info.outputs.sys-hash }}-${{ hashFiles('CMakeLists.txt', 'toolchain/dependencies/**', 'toolchain/cmake/**', 'src/**/*.fpp', 'src/**/*.f90') }} | |
| - name: Build | |
| run: | | |
| /bin/bash mfc.sh test -v --dry-run -j "$(nproc)" --${{ matrix.debug }} --${{ matrix.mpi }} $PRECISION $TEST_ALL | |
| env: | |
| TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }} | |
| PRECISION: ${{ matrix.precision != '' && format('--{0}', matrix.precision) || '' }} | |
| - name: Test | |
| run: | | |
| rm -f tests/failed_uuids.txt | |
| TEST_EXIT=0 | |
| /bin/bash mfc.sh test -v --max-attempts 3 -j "$(nproc)" $ONLY_CHANGES $TEST_ALL $TEST_PCT || TEST_EXIT=$? | |
| # Retry only if a small number of tests failed (sporadic failures) | |
| if [ -s tests/failed_uuids.txt ]; then | |
| NUM_FAILED=$(wc -l < tests/failed_uuids.txt) | |
| if [ "$NUM_FAILED" -le 5 ]; then | |
| FAILED=$(tr '\n' ' ' < tests/failed_uuids.txt) | |
| echo "" | |
| echo "=== Retrying $NUM_FAILED failed test(s): $FAILED ===" | |
| echo "" | |
| /bin/bash mfc.sh test -v --max-attempts 3 -j "$(nproc)" --only $FAILED $TEST_ALL || exit $? | |
| else | |
| echo "Too many failures ($NUM_FAILED) to retry — likely a real issue." | |
| exit 1 | |
| fi | |
| elif [ "$TEST_EXIT" -ne 0 ]; then | |
| exit $TEST_EXIT | |
| fi | |
| env: | |
| TEST_ALL: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }} | |
| TEST_PCT: ${{ matrix.debug == 'debug' && '-% 20' || '' }} | |
| ONLY_CHANGES: ${{ github.event_name == 'pull_request' && '--only-changes' || '' }} | |
| self: | |
| name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})" | |
| needs: [lint-gate, file-changes, rebuild-cache] | |
| if: >- | |
| always() && | |
| needs.lint-gate.result == 'success' && | |
| needs.file-changes.result == 'success' && | |
| needs.rebuild-cache.result != 'cancelled' && | |
| github.repository == 'MFlowCode/MFC' && | |
| needs.file-changes.outputs.checkall == 'true' && | |
| github.event.pull_request.draft != true | |
| continue-on-error: false | |
| timeout-minutes: 480 | |
| strategy: | |
| matrix: | |
| include: | |
| # Phoenix (GT) — build+test combined in SLURM job | |
| - runner: 'gt' | |
| cluster: 'phoenix' | |
| cluster_name: 'Georgia Tech | Phoenix' | |
| device: 'gpu' | |
| interface: 'acc' | |
| - runner: 'gt' | |
| cluster: 'phoenix' | |
| cluster_name: 'Georgia Tech | Phoenix' | |
| device: 'gpu' | |
| interface: 'omp' | |
| - runner: 'gt' | |
| cluster: 'phoenix' | |
| cluster_name: 'Georgia Tech | Phoenix' | |
| device: 'cpu' | |
| interface: 'none' | |
| # Frontier (ORNL) — build on login node, GPU tests sharded for batch partition | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'gpu' | |
| interface: 'acc' | |
| shard: '1/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'gpu' | |
| interface: 'acc' | |
| shard: '2/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'gpu' | |
| interface: 'omp' | |
| shard: '1/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'gpu' | |
| interface: 'omp' | |
| shard: '2/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier' | |
| cluster_name: 'Oak Ridge | Frontier' | |
| device: 'cpu' | |
| interface: 'none' | |
| # Frontier AMD — build on login node, GPU tests sharded for batch partition | |
| - runner: 'frontier' | |
| cluster: 'frontier_amd' | |
| cluster_name: 'Oak Ridge | Frontier (AMD)' | |
| device: 'gpu' | |
| interface: 'omp' | |
| shard: '1/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier_amd' | |
| cluster_name: 'Oak Ridge | Frontier (AMD)' | |
| device: 'gpu' | |
| interface: 'omp' | |
| shard: '2/2' | |
| - runner: 'frontier' | |
| cluster: 'frontier_amd' | |
| cluster_name: 'Oak Ridge | Frontier (AMD)' | |
| device: 'cpu' | |
| interface: 'none' | |
| runs-on: | |
| group: phoenix | |
| labels: ${{ matrix.runner }} | |
| env: | |
| NODE_OPTIONS: ${{ matrix.cluster == 'phoenix' && '--max-old-space-size=2048' || '' }} | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v4 | |
| with: | |
| clean: false | |
| - name: Download Coverage Cache | |
| if: needs.rebuild-cache.result == 'success' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: coverage-cache | |
| path: toolchain/mfc/test | |
| continue-on-error: true | |
| - name: Build | |
| if: matrix.cluster != 'phoenix' | |
| uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3 | |
| with: | |
| max_attempts: 3 | |
| retry_wait_seconds: 60 | |
| timeout_minutes: 60 | |
| command: bash .github/workflows/${{ matrix.cluster }}/build.sh ${{ matrix.device }} ${{ matrix.interface }} | |
| on_retry_command: ./mfc.sh clean | |
| - name: Test | |
| run: bash .github/workflows/${{ matrix.cluster }}/submit.sh .github/workflows/${{ matrix.cluster }}/test.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.shard }} | |
| - name: Compute Log Slug | |
| if: always() | |
| id: log | |
| run: | | |
| SLUG="test-${{ matrix.device }}-${{ matrix.interface }}" | |
| SHARD="${{ matrix.shard }}" | |
| if [ -n "$SHARD" ]; then | |
| SLUG="${SLUG}-$(echo "$SHARD" | sed 's|/|-of-|')" | |
| fi | |
| echo "slug=${SLUG}" >> "$GITHUB_OUTPUT" | |
| - name: Print Logs | |
| if: always() | |
| run: cat ${{ steps.log.outputs.slug }}.out | |
| - name: Archive Logs | |
| uses: actions/upload-artifact@v4 | |
| if: matrix.cluster != 'phoenix' | |
| with: | |
| name: logs-${{ strategy.job-index }}-${{ steps.log.outputs.slug }} | |
| path: ${{ steps.log.outputs.slug }}.out | |