GPU plan: make weight persistence concrete (Modal Volume cache)

Document and wire the weight-caching mechanism: - modal.Volume is a cloud-backed FS independent of the GPU/container; run 1 downloads weights into /weights, run 2+ reuses them (no GPU time wasted re-downloading). - Point downloaders at the mount: HF_HOME/TORCH_HOME/boltz --cache; persist via weights.commit(), see updates via weights.reload(). - Volume storage costs pennies, separate from GPU = near-free caching. modal_app.py cofold(): set cache env vars to /weights, reload()/commit() around the (stubbed) boltz call. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-24 16:48:50 +02:00
parent 08ed713cc8
commit 81d56b7a76
2 changed files with 47 additions and 6 deletions
--- a/gpu/modal_app.py
+++ b/gpu/modal_app.py
@@ -37,17 +37,37 @@ KNOWN = {
 }


-@app.function(gpu="L4", image=image, volumes={"/weights": weights}, timeout=3600)
-def cofold(protein_seq: str, ligand_smiles: str, weights_dir: str = "/weights") -> dict:
+# Cache locations on the persistent Volume — the model downloads here ONCE and reuses forever.
+WEIGHTS = "/weights"
+
+
+@app.function(gpu="L4", image=image, volumes={WEIGHTS: weights}, timeout=3600)
+def cofold(protein_seq: str, ligand_smiles: str) -> dict:
    """Co-fold one protein+ligand complex and return predicted affinity + pose (PDB string).

-    Runs on the GPU only for this call, then the GPU is released. TODO: replace the stub with the
-    actual Boltz-2 invocation (write the YAML/FASTA input spec, call `boltz predict
-    --use_msa_server --out_dir ... --cache /weights`, parse the predicted structure + affinity).
+    Runs on the GPU only for this call, then the GPU is released. Model weights persist on the
+    mounted Volume across runs (see HF_HOME / --cache below), so we never re-pay GPU time to
+    re-download them.
    """
+    import os
    import subprocess  # noqa: F401  (used once boltz is wired)

-    # TODO: build boltz input (protein_seq + ligand_smiles), run, parse pose+affinity.
+    # Point every weight downloader at the persistent Volume so the cache survives teardown.
+    os.environ["HF_HOME"] = f"{WEIGHTS}/hf"          # huggingface_hub cache
+    os.environ["TORCH_HOME"] = f"{WEIGHTS}/torch"    # torch.hub cache
+    boltz_cache = f"{WEIGHTS}/boltz"                  # boltz --cache target
+    os.makedirs(boltz_cache, exist_ok=True)
+
+    # See what's already cached (run 2+ finds weights here and skips the download).
+    weights.reload()
+
+    # TODO: build boltz input (protein_seq + ligand_smiles), then:
+    #   subprocess.run(["boltz", "predict", input_yaml, "--use_msa_server",
+    #                   "--cache", boltz_cache, "--out_dir", "/tmp/out"], check=True)
+    # parse predicted structure + affinity from /tmp/out.
+
+    # Persist anything newly downloaded into the cache so the NEXT run reuses it.
+    weights.commit()
    raise NotImplementedError("Wire Boltz-2 here; see docs/gpu_plan.md Phase 1.")