Scaffold Reverso MVP pipeline structure

Set up the project skeleton per PLAN.md §4:
- src/ package: identifiers, disease, drugs, scoring, provenance
  with pydantic schemas and confidence-tier logic (working);
  data-pull/compute functions stubbed per their build week
- 5 starter notebooks (01-05) with PLAN-referenced steps
- tests/test_scoring.py: tier-assignment tests pass; scoring
  reference test xfail until Week 3
- docs/: recovery_test_report, data_sources, known_limitations skeletons
- pyproject.toml (requires-python >=3.11,<3.14), .gitignore, README
- data/ tree preserved via .gitkeep; raw/processed/results gitignored

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-23 20:19:38 +02:00
parent e717cf40ed
commit b731478f5d
25 changed files with 1038 additions and 4 deletions

65
tests/test_scoring.py Normal file
View File

@@ -0,0 +1,65 @@
"""Tests for the matching engine and provenance logic.
The headline test (PLAN.md §6, Week 3 task 4) verifies connectivity scoring against a known
reference within tolerance; it is marked xfail until the scorer is implemented in Week 3.
The tier-assignment tests run today — they pin the rules from PLAN.md §3 so the most
commercially important design decision can't silently drift.
"""
from __future__ import annotations
import pytest
from src.provenance import ConfidenceTier, assign_tier
class TestAssignTier:
"""Tier rules from PLAN.md §3."""
def test_measured_large_n_peer_reviewed_multi_source_is_tier_a(self):
assert (
assign_tier(
is_measured=True,
n_per_group=27,
peer_reviewed=True,
single_source=False,
)
== ConfidenceTier.A
)
def test_inferred_is_always_tier_c(self):
assert (
assign_tier(
is_measured=False,
n_per_group=1000,
peer_reviewed=True,
single_source=False,
)
== ConfidenceTier.C
)
@pytest.mark.parametrize(
"kwargs",
[
dict(is_measured=True, n_per_group=6, peer_reviewed=True, single_source=False),
dict(is_measured=True, n_per_group=27, peer_reviewed=False, single_source=False),
dict(is_measured=True, n_per_group=27, peer_reviewed=True, single_source=True),
dict(is_measured=True, n_per_group=None, peer_reviewed=True, single_source=False),
],
)
def test_measured_but_weak_evidence_is_tier_b(self, kwargs):
assert assign_tier(**kwargs) == ConfidenceTier.B
@pytest.mark.xfail(reason="Connectivity scoring implemented in Week 3 (notebook 04).", strict=True)
def test_connectivity_score_matches_reference():
"""Verify connectivity scoring against a CMap/cmapPy reference within tolerance.
PLAN.md §6, Week 3 task 4. Replace this body with a known reference example
(disease up/down sets + drug signature -> expected score) once the scorer exists.
"""
from src.scoring import connectivity_score
score = connectivity_score(up_genes=[], down_genes=[], drug_signature=None) # noqa
assert score == pytest.approx(0.0, abs=1e-6)