"""Structure-binding §12.4: redocking-RMSD validation + ligand-efficiency normalization. Two de-biased checks of the docking baseline: A. REDOCKING RMSD — redock each co-crystal ligand into its own structure and measure pose RMSD vs the crystal pose (open-babel obrms, symmetry-aware). <2 A = geometry validated. This is the gold-standard positive control and is immune to the molecular-size bias that made the cross-target affinity test inconclusive. B. LIGAND EFFICIENCY — affinity / heavy-atom count, to de-bias the size effect in the raw scores. """ from __future__ import annotations import re import subprocess from pathlib import Path from rdkit import Chem, RDLogger import sys sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from scripts.dock_positive_controls import ( # noqa: E402 STRUCT, VINA, WORK, TARGETS, pubchem_smiles, prep_ligand, prep_receptor_and_box, ) RDLogger.DisableLog("rdApp.*") XTAL_LIG = {"hemoglobin": ("5E83", "5L7", "voxelotor"), "PKR": ("8XFD", "WV2", "mitapivat")} def extract_crystal_ligand(pdb: str, resname: str) -> Path: lines, chosen = [], None for ln in (STRUCT / f"{pdb}.pdb").read_text().splitlines(): if ln.startswith("HETATM") and ln[17:20].strip() == resname: key = (ln[21], ln[22:26]) chosen = chosen or key if key == chosen: lines.append(ln) out = WORK / f"xtal_{resname}.pdb" out.write_text("\n".join(lines) + "\nEND\n") return out def dock_pose(rec, lig, center, size) -> Path | None: pose = WORK / "redock_out.pdbqt" subprocess.run([VINA, "--receptor", str(rec), "--ligand", str(lig), "--center_x", f"{center[0]:.2f}", "--center_y", f"{center[1]:.2f}", "--center_z", f"{center[2]:.2f}", "--size_x", f"{size[0]:.1f}", "--size_y", f"{size[1]:.1f}", "--size_z", f"{size[2]:.1f}", "--exhaustiveness", "16", "--out", str(pose)], capture_output=True, text=True) if not pose.exists(): return None top = WORK / "redock_top.pdb" # first model only subprocess.run(["obabel", str(pose), "-O", str(top), "-f", "1", "-l", "1"], capture_output=True) return top def obrms(ref: Path, test: Path) -> float | None: # strip H to compare heavy-atom poses; obrms is automorphism-aware r2, t2 = WORK / "ref_noH.sdf", WORK / "test_noH.sdf" subprocess.run(["obabel", str(ref), "-d", "-O", str(r2)], capture_output=True) subprocess.run(["obabel", str(test), "-d", "-O", str(t2)], capture_output=True) out = subprocess.run(["obrms", str(r2), str(t2)], capture_output=True, text=True).stdout m = re.search(r"(-?\d+\.\d+)", out) return float(m.group(1)) if m else None def main() -> None: print("=== A. Redocking-RMSD validation ===") for target, (pdb, resname, drug) in XTAL_LIG.items(): rec, center, size = prep_receptor_and_box(pdb, resname) smi = pubchem_smiles(drug) lig = prep_ligand(drug, smi) xtal = extract_crystal_ligand(pdb, resname) pose = dock_pose(rec, lig, center, size) rmsd = obrms(xtal, pose) if pose else None verdict = "PASS (<2A)" if (rmsd is not None and rmsd < 2.0) else \ ("MARGINAL (<3A)" if (rmsd is not None and rmsd < 3.0) else "FAIL") rstr = f"{rmsd:.2f} A" if rmsd is not None else "NA" print(f" {drug:12s} -> {target:11s} ({pdb}/{resname}): redock RMSD {rstr} {verdict}") print("\n=== B. Ligand efficiency (affinity / heavy atoms), de-biasing size ===") # raw affinities from the cross-dock baseline (scripts/dock_positive_controls.py) aff = {"voxelotor": {"hemoglobin": -8.1, "PKR": -9.3}, "mitapivat": {"hemoglobin": -10.0, "PKR": -11.2}, "decitabine": {"hemoglobin": -6.6, "PKR": -7.0}, "hydroxyurea": {"hemoglobin": -3.9, "PKR": -3.6}, "caffeine": {"hemoglobin": -6.1, "PKR": -6.4}} print(f" {'ligand':12s}{'heavy':>6s}" + "".join(f"{t+' LE':>14s}" for t in TARGETS)) for lig, row in aff.items(): ha = Chem.MolFromSmiles(pubchem_smiles(lig)).GetNumHeavyAtoms() cells = "".join(f"{row[t]/ha:>14.3f}" for t in TARGETS) print(f" {lig:12s}{ha:>6d}{cells}") if __name__ == "__main__": main()