From 7c6cef1aef80f725b7cdb1971a94e3cdc39f37a2 Mon Sep 17 00:00:00 2001 From: "Junior B." Date: Wed, 24 Jun 2026 16:38:54 +0200 Subject: [PATCH] Production docking: prep helps (7.9->4.8A) but Vina wrong tool for sickle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit §12.4 pushed to its limit. Meeko ligand prep + in-place symmetry RMSD (spyrmsd, not obrms) on clean HDAC2/vorinostat: 7.9A -> 4.76A. Prep and metric mattered, but still FAIL. Residual cause is fundamental: vorinostat binds via hydroxamate-Zn chelation and Vina has no metal-coordination term. Real finding: sickle's druggable targets bind via non-classical chemistry classical docking handles poorly -- Hb (covalent), PKR (allosteric+cofactor), HDAC (Zn chelation). Vina is the wrong tool for this target landscape. Redirect: data-driven AF3-class co-folding (Boltz-2/Chai-1/DiffDock) handles these modes -- the indicated next tool, gated by the 24GB local memory ceiling (cloud GPU needed). The "GPU breaks all-local" §12.6 prediction is now the binding constraint of the track. Adds: scripts/dock_production.py; deps meeko, spyrmsd, gemmi. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/structure_binding_notes.md | 34 +++++++++++--- scripts/dock_production.py | 83 +++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 6 deletions(-) create mode 100644 scripts/dock_production.py diff --git a/docs/structure_binding_notes.md b/docs/structure_binding_notes.md index efd7ae2..ed43ae3 100644 --- a/docs/structure_binding_notes.md +++ b/docs/structure_binding_notes.md @@ -79,13 +79,35 @@ Likely causes (in priority order): but doesn't survive honest validation. Credible structure-based docking needs production prep tooling (Meeko/ADFR), which is the real next investment for this track. +## Step 3 — production prep helps, but classical docking is the wrong tool here (2026-06-24) + +`scripts/dock_production.py`: Meeko ligand prep (proper rotatable-bond/AD typing) + in-place +symmetry-corrected RMSD (spyrmsd, not obrms which superimposes). On the clean HDAC2/vorinostat +target (Zn kept): + +- **7.9 Å → 4.76 Å** with proper ligand prep + correct metric. Prep and metric genuinely mattered. +- But still FAIL (>2 Å). The residual is the deeper problem: **vorinostat binding is defined by its + hydroxamate chelating the catalytic Zn, and Vina has no metal-coordination term** — it cannot + score the interaction that determines the pose. + +**The real finding: sickle's druggable targets bind via non-classical chemistry that classical +docking handles poorly** — Hb/voxelotor (covalent), PKR/mitapivat (allosteric + cofactor), +HDAC/vorinostat (Zn chelation). This is the target landscape, not bad luck. AutoDock Vina is the +wrong tool for it. + +**Redirect:** the modality that DOES handle covalent/metal/induced-fit binding is **data-driven +AF3-class co-folding** (Boltz-2 / Chai-1 / DiffDock — they learn these modes from the PDB). That is +the indicated next tool for this disease — and it's gated by the **24 GB local memory ceiling** +(PLAN §12.6 pitfall 4): needs a cloud GPU or a bigger box. The "GPU breaks all-local" prediction is +now the binding constraint of the whole track. + ## Next steps -- [ ] Install **Meeko** (+ reduce / pdb2pqr) and redo receptor+ligand prep; re-run redocking RMSD. -- [ ] Fix the RMSD metric (in-place, symmetry-corrected) to rule out a measurement artifact. -- [ ] Only once redocking validates (<2 Å) are affinity scores trustworthy — then cross-dock / - screen the library and revisit ligand-efficiency / pose-based scoring. -- [ ] Later: AF3-class co-folding (Boltz-2/DiffDock via PyTorch-MPS — 24 GB ceiling) and the §12.9 - generative beacon. +- [ ] AF3-class co-folding on a GPU (Boltz-2 affinity / Chai-1 / DiffDock); redo the §12.4 + positive-control recovery test there — it should handle the metal/covalent modes Vina can't. +- [ ] (optional) Salvage one classical Vina case: PKR with FBP/Mg cofactors RETAINED, to confirm + the harness can validate on a non-metal sickle target. +- [ ] Production receptor prep (Meeko mk_prepare_receptor + protonation) if staying with Vina. +- [ ] §12.9 generative beacon — only after a validated scoring function exists. > **Hardware note:** this machine is **24 GB** unified memory (not the 96 GB PLAN §2 assumed), > which caps local AF3-class model inference. Classical docking (above) is unaffected. diff --git a/scripts/dock_production.py b/scripts/dock_production.py new file mode 100644 index 0000000..5e844c8 --- /dev/null +++ b/scripts/dock_production.py @@ -0,0 +1,83 @@ +"""Push docking to credibility: Meeko ligand prep + in-place spyrmsd, on the clean HDAC2 target. + +Isolates the two cheap suspects behind the 7.9 A redocking failure: + 1. ligand prep — replace bare obabel --gen3d with Meeko (correct rotatable bonds / AD typing) + 2. RMSD metric — replace obrms (superimposes) with spyrmsd in-place, symmetry-corrected +Receptor reused (obabel-protonated 4LXZ with catalytic Zn kept). If redock RMSD drops <2 A, the +docking pipeline is validated and the earlier failure was prep+metric, not docking itself. +""" + +from __future__ import annotations + +import subprocess +from pathlib import Path + +import numpy as np +from rdkit import Chem, RDLogger +from rdkit.Chem import AllChem +from meeko import MoleculePreparation, PDBQTWriterLegacy +from spyrmsd import io as spyio, rmsd as spyrmsd + +import sys +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from scripts.dock_positive_controls import STRUCT, VINA, WORK, pubchem_smiles # noqa: E402 +from scripts.dock_validate import dock_pose, extract_crystal_ligand # noqa: E402 + +RDLogger.DisableLog("rdApp.*") +PDB, RES, DRUG = "4LXZ", "SHH", "vorinostat" + + +def prep_receptor_keep_zn() -> tuple[Path, np.ndarray, np.ndarray]: + atoms, lig, chosen = [], [], None + for ln in (STRUCT / f"{PDB}.pdb").read_text().splitlines(): + if ln.startswith("ATOM") or (ln.startswith("HETATM") and ln[17:20].strip() == "ZN"): + atoms.append(ln) + elif ln.startswith("HETATM") and ln[17:20].strip() == RES: + key = (ln[21], ln[22:26]); chosen = chosen or key + if key == chosen: + lig.append(ln) + recpdb = WORK / f"{PDB}_rec.pdb"; recpdb.write_text("\n".join(atoms) + "\nEND\n") + recpdbqt = WORK / f"{PDB}_rec.pdbqt" + subprocess.run(["obabel", str(recpdb), "-O", str(recpdbqt), "-xr", "-p", "7.4"], capture_output=True) + xyz = np.array([[float(l[30:38]), float(l[38:46]), float(l[46:54])] for l in lig]) + return recpdbqt, xyz.mean(0), np.clip((xyz.max(0) - xyz.min(0)) + 8, 16, 26) + + +def meeko_ligand(smiles: str) -> Path: + mol = Chem.AddHs(Chem.MolFromSmiles(smiles)) + AllChem.EmbedMolecule(mol, randomSeed=0xC0FFEE) + AllChem.MMFFOptimizeMolecule(mol) + setups = MoleculePreparation().prepare(mol) + pdbqt, ok, err = PDBQTWriterLegacy.write_string(setups[0]) + if not ok: + raise RuntimeError(f"meeko ligand prep failed: {err}") + out = WORK / f"lig_{DRUG}_meeko.pdbqt"; out.write_text(pdbqt) + return out + + +def inplace_rmsd(crystal_pdb: Path, docked_pdbqt: Path) -> float: + cref = WORK / "xtal.sdf"; cdoc = WORK / "docked.sdf" + subprocess.run(["obabel", str(crystal_pdb), "-O", str(cref)], capture_output=True) + subprocess.run(["obabel", str(docked_pdbqt), "-O", str(cdoc), "-f", "1", "-l", "1"], capture_output=True) + ref, mol = spyio.loadmol(str(cref)), spyio.loadmol(str(cdoc)) + ref.strip(); mol.strip() + r = spyrmsd.rmsdwrapper(ref, mol, symmetry=True, minimize=False) + return float(r[0] if isinstance(r, (list, tuple, np.ndarray)) else r) + + +def main() -> None: + rec, center, size = prep_receptor_keep_zn() + smi = pubchem_smiles(DRUG) + lig = meeko_ligand(smi) + print(f"meeko ligand pdbqt: {lig.name}; box center {center.round(1)} size {size.round(1)}") + pose = dock_pose(rec, lig, center, size) # writes WORK/redock_out.pdbqt + raw = WORK / "redock_out.pdbqt" + xtal = extract_crystal_ligand(PDB, RES) + rmsd = inplace_rmsd(xtal, raw) + verdict = "PASS (<2A)" if rmsd < 2 else ("MARGINAL (<3A)" if rmsd < 3 else "FAIL") + print(f"\nvorinostat -> HDAC2 redock | in-place spyrmsd RMSD = {rmsd:.2f} A {verdict}") + print("(prior obabel-ligand + obrms gave 7.9 A)") + + +if __name__ == "__main__": + main()