name: incumbent-benchmark on: push: branches: [main] paths: - "benchmark/**" - ".github/workflows/incumbent-benchmark.yml" pull_request: paths: - "benchmark/**" - ".github/workflows/incumbent-benchmark.yml" jobs: test: name: corpus + harness tests (py${{ matrix.python-version }}) runs-on: ubuntu-latest strategy: fail-fast: false matrix: python-version: ["3.11", "3.12"] defaults: run: working-directory: benchmark steps: - name: Check out repository uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Upgrade packaging tooling run: python -m pip install --upgrade pip - name: Install benchmark package run: pip install -e . - name: Install test runner run: pip install pytest - name: Run test suite (dossier corpus integrity + harness) run: pytest tests/ -v # The dossier corpus is the benchmark's ground truth. Any PR that touches # a dossier gets a dedicated, prominently named status check so reviewers # can see at a glance that the corpus still validates — separate from # ordinary code changes. corpus-guard: name: dossier corpus guard runs-on: ubuntu-latest defaults: run: working-directory: benchmark steps: - name: Check out repository uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.12" - name: Install benchmark package run: | python -m pip install --upgrade pip pip install -e . pip install pytest - name: Validate all 30 dossiers run: pytest tests/test_dossiers.py -v