Week 4: recovery test (FAIL, reported honestly) + 2-page report

Run the formal recovery test against the pre-registered criteria and write the deliverable report (PLAN §6 Week 4): - week4_recovery_test.py: evaluate hydroxyurea/L-glutamine + 5 pre-specified negative controls vs the committed criteria - recovery_test_report.md: methodology, FAIL result with diagnosis, top-10, lisinopril as the non-obvious candidate, limitations, v2 - known_limitations.md: L-glutamine coverage resolved, 12%-overlap driver, recovery outcome table Outcome: FAIL on all 3 criteria (hydroxyurea top 13%, L-glutamine WTCS=0, 1/5 negative controls bottom-half). Root cause is signature/ assay data limitations (lost erythroid+HbF axis, 12% landmark overlap), not the matching algorithm — reported straight per the project ethos. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-23 22:38:56 +02:00
parent fd4591949c
commit 72f1a49de6
3 changed files with 192 additions and 36 deletions
--- a/scripts/week4_recovery_test.py
+++ b/scripts/week4_recovery_test.py
@@ -0,0 +1,81 @@
+"""Week 4: formal recovery test against the pre-registered criteria (PLAN §6).
+
+Pre-registered criteria (committed in docs/recovery_test_report.md before this run):
+  - hydroxyurea in top 10% (top 30 of 300), AND
+  - L-glutamine in top 25% (top 75) OR documented unscorable due to missing LINCS signature, AND
+  - >=4 of 5 pre-specified negative controls in the bottom half.
+
+The 5 negative controls are pre-specified here by a category rule (one per category, alphabetically
+first available) so the choice does not peek at ranks. Primary ranking = raw connectivity.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pandas as pd
+
+RANKED = Path("data/results/ranked_candidates_v1.csv")
+
+# One per unrelated category, alphabetical-first — chosen without looking at ranks.
+NEG_CONTROL_CATEGORIES = {
+    "antifungal": ["clotrimazole", "fluconazole", "itraconazole", "ketoconazole", "miconazole", "terbinafine"],
+    "antihistamine": ["astemizole", "cetirizine", "diphenhydramine", "fexofenadine", "loratadine"],
+    "antibiotic": ["azithromycin", "ciprofloxacin", "doxycycline", "tetracycline", "trimethoprim"],
+    "hormone": ["ethinyl-estradiol", "levonorgestrel", "medroxyprogesterone-acetate", "norethindrone"],
+    "misc": ["caffeine", "lidocaine", "loperamide", "omeprazole", "ranitidine"],
+}
+
+
+def main() -> None:
+    df = pd.read_csv(RANKED).set_index("drug_name")
+    n = len(df)
+    top10_cut, top25_cut, half = int(n * 0.10), int(n * 0.25), n // 2
+
+    def rk(name):
+        return int(df.loc[name, "rank"]) if name in df.index else None
+
+    hu, glut = rk("hydroxyurea"), rk("glutamine")
+
+    # pick negative controls present in the ranking
+    negs = {}
+    for cat, options in NEG_CONTROL_CATEGORIES.items():
+        pick = next((d for d in options if d in df.index), None)
+        if pick:
+            negs[pick] = (cat, rk(pick))
+
+    print("=" * 60)
+    print(f"N = {n}; top10 cut = {top10_cut}, top25 cut = {top25_cut}, bottom-half > {half}")
+    print(f"\nhydroxyurea: rank {hu} (top {100*hu/n:.1f}%)  -> top-10%? {hu <= top10_cut}")
+    glut_score = df.loc["glutamine", "connectivity_score"]
+    print(f"L-glutamine: rank {glut} (top {100*glut/n:.1f}%), WTCS={glut_score:.3f}  "
+          f"-> top-25%? {glut <= top25_cut}  (has signature, so NOT 'missing-signature unscorable')")
+    print("\nnegative controls (pre-specified, 1 per category):")
+    n_bottom = 0
+    for d, (cat, r) in negs.items():
+        in_bottom = r > half
+        n_bottom += in_bottom
+        print(f"  {d:18s} [{cat:13s}] rank {r:3d}  bottom-half? {in_bottom}")
+    print(f"  -> {n_bottom}/5 in bottom half (need >=4)")
+
+    crit_hu = hu <= top10_cut
+    crit_glut = glut <= top25_cut
+    crit_neg = n_bottom >= 4
+    overall = crit_hu and crit_glut and crit_neg
+    print(f"\nCRITERIA: hydroxyurea={crit_hu}, L-glutamine={crit_glut}, neg-controls={crit_neg}")
+    print(f"OVERALL (raw ranking): {'PASS' if overall else 'FAIL'}")
+
+    # secondary prior-weighted view (reported, not the primary criterion)
+    hu_b = int(df.loc["hydroxyurea", "blended_rank"])
+    print(f"\nsecondary (mechanistic-prior) ranking: hydroxyurea blended_rank {hu_b} "
+          f"(top {100*hu_b/n:.1f}%)")
+
+    print("\n--- TOP 10 (raw connectivity) ---")
+    top10 = df.nsmallest(10, "connectivity_score")
+    for name, r in top10.iterrows():
+        print(f"  {int(r['rank']):2d}  {name:18s} {r['connectivity_score']:+.3f}  "
+              f"[{r['inclusion_reason']}]  {str(r['known_targets'])[:45]}")
+
+
+if __name__ == "__main__":
+    main()