"""Pose-RMSD validation of the Boltz-2 HDAC2/vorinostat prediction (closes the ยง12.4 loop). Co-folding predicts the whole complex de novo, so it lands in its own frame. To compare poses: 1. superpose the predicted protein (chain A) onto the crystal 4LXZ binding chain (Ca, gemmi), 2. apply that transform to the predicted vorinostat (chain L) and the predicted Zn (chain M), 3. symmetry-corrected in-place RMSD of the transformed ligand vs the crystal SHH (spyrmsd), 4. bonus: did the catalytic Zn land near the real one? <2 A ligand RMSD = co-folding reproduces the geometry, not just the affinity ranking -- the test classical Vina failed on this exact Zn-chelation case (7.9 A). """ from __future__ import annotations import subprocess from pathlib import Path import gemmi import numpy as np from spyrmsd import io as spyio, rmsd as spyrmsd PRED = Path("data/processed/binding/HDAC2_vorinostat_pred.pdb") XTAL = Path("data/raw/structures/4LXZ.pdb") LIG_RES = "SHH" # crystal vorinostat WORK = Path("data/processed/binding") def crystal_binding_chain(st) -> gemmi.Chain: model = st[0] lig = [a.pos for ch in model for r in ch if r.name == LIG_RES for a in r] c = gemmi.Position(sum(p.x for p in lig) / len(lig), sum(p.y for p in lig) / len(lig), sum(p.z for p in lig) / len(lig)) best, bestd = None, 1e9 for ch in model: if len(ch.get_polymer()) < 20: continue d = min((a.pos.dist(c) for r in ch for a in r), default=1e9) if d < bestd: best, bestd = ch, d return best def hetatm_lines(pdb: Path, chain: str | None = None, resname: str | None = None) -> list[str]: out = [] for ln in pdb.read_text().splitlines(): if not ln.startswith(("ATOM", "HETATM")): continue if chain and ln[21] != chain: continue if resname and ln[17:20].strip() != resname: continue out.append(ln) return out def transform_and_write(lines: list[str], T: gemmi.Transform, dest: Path) -> Path: new = [] for ln in lines: p = T.apply(gemmi.Position(float(ln[30:38]), float(ln[38:46]), float(ln[46:54]))) new.append(f"{ln[:30]}{p.x:8.3f}{p.y:8.3f}{p.z:8.3f}{ln[54:]}") dest.write_text("\n".join(new) + "\nEND\n") return dest def to_sdf(pdb: Path) -> Path: sdf = pdb.with_suffix(".sdf") subprocess.run(["obabel", str(pdb), "-O", str(sdf)], capture_output=True) return sdf def main() -> None: pred_st = gemmi.read_structure(str(PRED)) xtal_st = gemmi.read_structure(str(XTAL)) pred_chain = pred_st[0]["A"] xtal_chain = crystal_binding_chain(xtal_st) sup = gemmi.calculate_superposition(xtal_chain.get_polymer(), pred_chain.get_polymer(), gemmi.PolymerType.PeptideL, gemmi.SupSelect.CaP) T = sup.transform print(f"protein superposition: Ca RMSD {sup.rmsd:.2f} A over {sup.count} residues") # predicted ligand (chain L) and Zn (chain M) -> transform into crystal frame pred_lig = transform_and_write(hetatm_lines(PRED, chain="L"), T, WORK / "pred_lig_aligned.pdb") pred_zn_lines = hetatm_lines(PRED, chain="M") # one copy only (4LXZ has 3 SHH copies); keep the first (chain, resseq) group. shh = hetatm_lines(XTAL, resname=LIG_RES) first_key = (shh[0][21], shh[0][22:26]) one_copy = [ln for ln in shh if (ln[21], ln[22:26]) == first_key] crys_lig = WORK / "xtal_lig.pdb" crys_lig.write_text("\n".join(one_copy) + "\nEND\n") # ligand pose RMSD (in-place, symmetry-corrected) ref, mol = spyio.loadmol(str(to_sdf(crys_lig))), spyio.loadmol(str(to_sdf(pred_lig))) ref.strip(); mol.strip() rmsd = float(spyrmsd.rmsdwrapper(ref, mol, symmetry=True, minimize=False)[0]) # zinc placement: transformed predicted Zn vs nearest crystal ZN verdict = "PASS (<2A)" if rmsd < 2 else ("MARGINAL (<3A)" if rmsd < 3 else "FAIL") print(f"\nvorinostat pose RMSD (co-folded vs crystal) = {rmsd:.2f} A {verdict}") print("(classical Vina on this Zn-chelation case: 7.9 A)") pred_zn = next((a.pos for ch in pred_st[0] if ch.name == "M" for r in ch for a in r), None) xtal_zn = [a.pos for ch in xtal_st[0] for r in ch if r.name == "ZN" for a in r] if pred_zn and xtal_zn: q = T.apply(pred_zn) dz = min(((q.x - z.x) ** 2 + (q.y - z.y) ** 2 + (q.z - z.z) ** 2) ** 0.5 for z in xtal_zn) print(f"catalytic Zn placement error = {dz:.2f} A " f"{'(zinc correctly placed)' if dz < 2 else ''}") if __name__ == "__main__": main()