Reverso/tests/test_scoring.py

"""Tests for the matching engine and provenance logic.

Connectivity tests (PLAN.md §6, Week 3 task 4) pin the weighted-KS scorer against hand-built
reference profiles. The tier-assignment tests pin the rules from PLAN.md §3 so the most
commercially important design decision can't silently drift.
"""

from __future__ import annotations

import pandas as pd
import pytest

from src.provenance import ConfidenceTier, assign_tier


class TestAssignTier:
    """Tier rules from PLAN.md §3."""

    def test_measured_large_n_peer_reviewed_multi_source_is_tier_a(self):
        assert (
            assign_tier(
                is_measured=True,
                n_per_group=27,
                peer_reviewed=True,
                single_source=False,
            )
            == ConfidenceTier.A
        )

    def test_inferred_is_always_tier_c(self):
        assert (
            assign_tier(
                is_measured=False,
                n_per_group=1000,
                peer_reviewed=True,
                single_source=False,
            )
            == ConfidenceTier.C
        )

    @pytest.mark.parametrize(
        "kwargs",
        [
            dict(is_measured=True, n_per_group=6, peer_reviewed=True, single_source=False),
            dict(is_measured=True, n_per_group=27, peer_reviewed=False, single_source=False),
            dict(is_measured=True, n_per_group=27, peer_reviewed=True, single_source=True),
            dict(is_measured=True, n_per_group=None, peer_reviewed=True, single_source=False),
        ],
    )
    def test_measured_but_weak_evidence_is_tier_b(self, kwargs):
        assert assign_tier(**kwargs) == ConfidenceTier.B


class TestConnectivityScore:
    """Reference checks for the weighted-KS connectivity score (PLAN §6 Week 3 task 4).

    Query: up = {U1, U2}, down = {D1, D2}. We build drug profiles with a known relationship to
    the query and assert the sign/ordering the CMap convention requires.
    """

    UP = ["U1", "U2"]
    DOWN = ["D1", "D2"]

    @staticmethod
    def _profile(values: dict[str, float]) -> pd.Series:
        # 20 filler genes at ~0 so the query genes sit clearly at the extremes.
        base = {f"N{i}": 0.01 * ((i % 5) - 2) for i in range(20)}
        base.update(values)
        return pd.Series(base)

    def test_perfect_reversal_is_strongly_negative(self):
        from src.scoring import connectivity_score
        # Drug pushes disease-up genes DOWN (very negative) and disease-down genes UP (very
        # positive) => reversal => negative connectivity.
        prof = self._profile({"U1": -8, "U2": -7, "D1": 8, "D2": 7})
        assert connectivity_score(self.UP, self.DOWN, prof) < -0.4

    def test_perfect_mimic_is_strongly_positive(self):
        from src.scoring import connectivity_score
        prof = self._profile({"U1": 8, "U2": 7, "D1": -8, "D2": -7})
        assert connectivity_score(self.UP, self.DOWN, prof) > 0.4

    def test_reversal_beats_mimic_and_null(self):
        from src.scoring import connectivity_score
        rev = connectivity_score(self.UP, self.DOWN, self._profile({"U1": -8, "U2": -7, "D1": 8, "D2": 7}))
        mimic = connectivity_score(self.UP, self.DOWN, self._profile({"U1": 8, "U2": 7, "D1": -8, "D2": -7}))
        null = connectivity_score(self.UP, self.DOWN, self._profile({"U1": 0.2, "U2": -0.1, "D1": 0.1, "D2": -0.2}))
        assert rev < null < mimic
        assert abs(null) < abs(rev)

    def test_same_sign_enrichment_returns_zero(self):
        from src.scoring import connectivity_score
        # Both up- and down-sets at the top => same-sign ES => ambiguous => 0 (WTCS rule).
        prof = self._profile({"U1": 8, "U2": 7, "D1": 6, "D2": 5})
        assert connectivity_score(self.UP, self.DOWN, prof) == 0.0

    def test_genes_absent_from_profile_are_ignored(self):
        from src.scoring import connectivity_score
        prof = self._profile({"U1": -8, "U2": -7, "D1": 8, "D2": 7})
        # Adding a query gene not in the profile must not change the score.
        s1 = connectivity_score(self.UP, self.DOWN, prof)
        s2 = connectivity_score(self.UP + ["NOT_IN_PROFILE"], self.DOWN, prof)
        assert s1 == pytest.approx(s2)


class TestMechanisticPrior:
    def test_counts_distinct_sickle_pathways(self):
        from src.scoring import mechanistic_prior
        # ribonucleotide reductase (hydroxyurea) -> hbf_epigenetic category.
        assert mechanistic_prior(["Ribonucleoside-diphosphate reductase RR1"]) == 1.0
        # DNMT (epigenetic) + hemoglobin -> two categories.
        assert mechanistic_prior(["DNA (cytosine-5)-methyltransferase 1", "Hemoglobin subunit beta"]) == 2.0
        assert mechanistic_prior([]) == 0.0
        assert mechanistic_prior(["Some unrelated kinase"]) == 0.0


class TestTauCalibration:
    """tau should reward a SPECIFIC reverser and give a near-zero score to a noise drug."""

    @staticmethod
    def _matrix() -> pd.DataFrame:
        genes = [f"U{i}" for i in range(5)] + [f"D{i}" for i in range(5)] + [f"G{i}" for i in range(40)]
        rng_vals = {g: 0.01 * ((hash(g) % 7) - 3) for g in genes}  # tiny deterministic noise
        # specific reverser: query-up genes at the bottom, query-down at the top, rest ~0
        specific = dict(rng_vals)
        for i in range(5):
            specific[f"U{i}"] = -8 - i
            specific[f"D{i}"] = 8 + i
        noise = dict(rng_vals)
        return pd.DataFrame([specific, noise], index=["specific", "noise"])[genes]

    def test_specific_reverser_has_strongly_negative_tau(self):
        from src.scoring import tau_calibrate
        up = [f"U{i}" for i in range(5)]
        down = [f"D{i}" for i in range(5)]
        out = tau_calibrate(up, down, self._matrix(), n_null=300, seed=0)
        # Ranked by spec_z (continuous); the specific reverser is the most negative.
        assert out.loc["specific", "spec_z"] < -2
        assert out.loc["specific", "spec_z"] < out.loc["noise", "spec_z"]
        assert out.loc["specific", "tau"] < -50  # tau also flags it (may saturate near -100)
        assert out.loc["specific", "rank"] == 1


def test_rank_drugs_orders_by_reversal():
    from src.scoring import rank_drugs
    genes = ["U1", "U2", "D1", "D2"] + [f"N{i}" for i in range(10)]
    base = {g: 0.0 for g in genes}
    reverser = {**base, "U1": -8, "U2": -7, "D1": 8, "D2": 7}
    mimic = {**base, "U1": 8, "U2": 7, "D1": -8, "D2": -7}
    matrix = pd.DataFrame([reverser, mimic], index=["reverser", "mimic"])
    ranked = rank_drugs(["U1", "U2"], ["D1", "D2"], matrix)
    assert ranked.loc["reverser", "rank"] == 1
    assert ranked.loc["reverser", "connectivity_score"] < ranked.loc["mimic", "connectivity_score"]