diff --git a/gpu/modal_app.py b/gpu/modal_app.py index 231e50a..36b09af 100644 --- a/gpu/modal_app.py +++ b/gpu/modal_app.py @@ -148,14 +148,22 @@ def cache_msa(label: str, protein_seq: str, ligand_smiles: str, cofactor_ccds: l out = work / "out" subprocess.run(["boltz", "predict", str(work / "in.yaml"), "--use_msa_server", "--cache", boltz_cache, "--out_dir", str(out), "--output_format", "pdb"], check=True) - a3m = next(out.rglob("*.a3m"), None) - if a3m is None: + # Pick the largest a3m that actually looks like FASTA/a3m text (Boltz writes several files; + # some are binary and crash its INPUT parser with KeyError '\x00'). Strip null bytes too. + candidates = sorted(out.rglob("*.a3m"), key=lambda p: p.stat().st_size, reverse=True) + chosen = None + for c in candidates: + raw = c.read_bytes() + if raw[:1] == b">" or b"\n>" in raw[:512]: + chosen = raw.replace(b"\x00", b"") + break + if chosen is None: weights.commit() - raise RuntimeError("MSA generation produced no .a3m") + raise RuntimeError(f"no valid a3m; candidates={[(str(p), p.stat().st_size) for p in candidates]}") msa_dir = Path(WEIGHTS) / "msa" msa_dir.mkdir(parents=True, exist_ok=True) dest = msa_dir / f"{label}.a3m" - shutil.copy(a3m, dest) + dest.write_bytes(chosen) weights.commit() return str(dest)