"""Structure-based track §12.4: dock known binders + negatives into Hb and PKR. Positive-control recovery test: voxelotor should dock best into hemoglobin (5E83), mitapivat best into PKR (8XFD), and unrelated drugs (caffeine, hydroxyurea) should score worse. The box is centered on each co-crystal ligand (5L7=voxelotor, WV2=mitapivat). AutoDock Vina (mac binary, Rosetta) + open-babel prep. Affinity in kcal/mol; more negative = stronger predicted binding. This is a docking baseline, not an efficacy claim (PLAN §12.6). """ from __future__ import annotations import re import subprocess import tempfile from pathlib import Path import numpy as np import requests VINA = "./tools/vina" STRUCT = Path("data/raw/structures") WORK = Path("data/processed/docking") WORK.mkdir(parents=True, exist_ok=True) TARGETS = {"hemoglobin": ("5E83", "5L7"), "PKR": ("8XFD", "WV2")} LIGANDS = ["voxelotor", "mitapivat", "decitabine", "hydroxyurea", "caffeine"] EXPECTED = {"voxelotor": "hemoglobin", "mitapivat": "PKR"} def pubchem_smiles(name: str) -> str | None: for prop in ("SMILES", "ConnectivitySMILES", "IsomericSMILES", "CanonicalSMILES"): try: d = requests.get(f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{name}/property/{prop}/JSON", timeout=30).json()["PropertyTable"]["Properties"][0] if prop in d: return d[prop] except Exception: continue return None def prep_receptor_and_box(pdb: str, lig_res: str): """Receptor pdbqt (protein only) + box center/size from one copy of the co-crystal ligand.""" atoms, lig, chosen = [], [], None for ln in (STRUCT / f"{pdb}.pdb").read_text().splitlines(): if ln.startswith("ATOM"): atoms.append(ln) elif ln.startswith("HETATM") and ln[17:20].strip() == lig_res: key = (ln[21], ln[22:26]) chosen = chosen or key if key == chosen: lig.append(ln) rec_pdb = WORK / f"{pdb}_rec.pdb" rec_pdb.write_text("\n".join(atoms) + "\nEND\n") rec_pdbqt = WORK / f"{pdb}_rec.pdbqt" subprocess.run(["obabel", str(rec_pdb), "-O", str(rec_pdbqt), "-xr", "-p", "7.4"], capture_output=True, check=True) xyz = np.array([[float(l[30:38]), float(l[38:46]), float(l[46:54])] for l in lig]) center = xyz.mean(0) size = np.clip((xyz.max(0) - xyz.min(0)) + 9.0, 18.0, 28.0) return rec_pdbqt, center, size def prep_ligand(name: str, smiles: str) -> Path | None: out = WORK / f"lig_{name}.pdbqt" r = subprocess.run(["obabel", f"-:{smiles}", "-O", str(out), "--gen3d", "-p", "7.4"], capture_output=True, text=True) return out if out.exists() and out.stat().st_size > 0 else None def dock(rec_pdbqt: Path, lig_pdbqt: Path, center, size) -> float | None: out = subprocess.run([VINA, "--receptor", str(rec_pdbqt), "--ligand", str(lig_pdbqt), "--center_x", f"{center[0]:.2f}", "--center_y", f"{center[1]:.2f}", "--center_z", f"{center[2]:.2f}", "--size_x", f"{size[0]:.1f}", "--size_y", f"{size[1]:.1f}", "--size_z", f"{size[2]:.1f}", "--exhaustiveness", "8", "--cpu", "4", "--out", str(WORK / "o.pdbqt")], capture_output=True, text=True) m = re.search(r"^\s+1\s+(-?\d+\.\d+)", out.stdout, re.M) return float(m.group(1)) if m else None def main() -> None: smis = {n: pubchem_smiles(n) for n in LIGANDS} ligs = {n: prep_ligand(n, s) for n, s in smis.items() if s} ligs = {n: p for n, p in ligs.items() if p} print(f"prepared ligands: {list(ligs)}") boxes = {t: prep_receptor_and_box(pdb, lr) for t, (pdb, lr) in TARGETS.items()} print("prepared receptors:", list(boxes)) print(f"\n{'ligand':14s}" + "".join(f"{t:>13s}" for t in TARGETS)) results = {} for lname, lpath in ligs.items(): row = {} for tname, (rec, center, size) in boxes.items(): row[tname] = dock(rec, lpath, center, size) results[lname] = row cells = "".join(f"{(f'{row[t]:.1f}' if row[t] is not None else 'NA'):>13s}" for t in TARGETS) print(f" {lname:12s}{cells}") print("\n=== positive-control recovery test (§12.4) ===") for lig, exp_target in EXPECTED.items(): if lig in results: best = min(results[lig], key=lambda t: results[lig][t] if results[lig][t] is not None else 99) ok = best == exp_target print(f" {lig:12s} best target = {best:11s} (expected {exp_target}) -> {'PASS' if ok else 'FAIL'}") if __name__ == "__main__": main()