From 98c99bc271cd728bd9d06f3391ad2c8974bbda48 Mon Sep 17 00:00:00 2001
From: "Junior B." <junior@bonto.ch>
Date: Fri, 26 Jun 2026 07:38:01 +0200
Subject: [PATCH] Validated MSA-reuse screen; bump cache_msa timeout for slow
 MSA server

2-drug confirm passes: MSA cached once + reused, a3m parses (null-strip +
FASTA-selection fix works), screened 2/2, results sensible. The earlier
'screened 0' (a3m KeyError '\x00') and FunctionTimeout (ColabFold server
queued >30min) are both resolved: a3m hardening + cache_msa timeout 4200s.

Corrected screen is end-to-end working: 1 MSA query, reused across all
drugs, exception-tolerant.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 gpu/modal_app.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gpu/modal_app.py b/gpu/modal_app.py
index 36b09af..6831902 100644
--- a/gpu/modal_app.py
+++ b/gpu/modal_app.py
@@ -127,7 +127,8 @@ def build_boltz_yaml(protein_seq: str, ligand_smiles: str, cofactor_ccds: list[s
 # max_containers caps parallel fan-out (cost control). The download race that corrupts the
 # checkpoint only happens on a COLD volume; once weights are cached+committed (Phase 1 did this),
 # parallel containers just reload them, so a screen can safely run ~10-wide.
-@app.function(gpu="L4", image=image, volumes={WEIGHTS: weights}, timeout=1800)
+# Long timeout: the public ColabFold MSA server queues and can take 20-40+ min for one query.
+@app.function(gpu="L4", image=image, volumes={WEIGHTS: weights}, timeout=4200)
 def cache_msa(label: str, protein_seq: str, ligand_smiles: str, cofactor_ccds: list[str]) -> str:
     """Compute the target's MSA ONCE (via the server) and cache the a3m on the Volume.