Boltz docking
Setup
CLI
Obtain API key from boltz.bio
# Install the CLI
pip install boltz-lab
# Set your API key
boltz-lab config --api-key "boltzpk_live_A3L3caM_...TRUNCATED..."
# Submit without waiting, good for virtual screening
boltz-lab predict job.yaml --no-wait --name "my_custom_name"
# waits + downloads locally, for single run
boltz-lab predict job.yaml --output ./results --name "my_custom_name"
# List all jobs
boltz-lab list
# Check status later
boltz-lab status <prediction-id>
# Download when complete
boltz-lab download <prediction-id> --output ./resultsPrepare YAML
prepare_boltz
def prepare_boltz(
seq:str, # Amino acid sequence of the protein the protein
smiles:str, # SMILES string of the ligand
fname:str, # Output filename (should end with .yaml)
):
Create a YAML file for protein-ligand affinity prediction.
from tqdm import tqdm
tqdm.pandas()
# mutate G12D from human WT seq
seq = "MTEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQRVEDAFYTLVREIRQYRLKKISKEEKTPGCVKIKKCIIM"
df.progress_apply(lambda r: prepare_boltz(seq,r.SMILES,f"kras_g12d/{r.ID}.yaml") ,axis=1)Run in batch
run_boltz
def run_boltz(
file_list:list, # list of .yaml path in Pathlib object
api_key, # API key for Boltz-Lab
job_name:NoneType=None, # job name appeared in boltz
):
Run Boltz-Lab predictions for a list of YAML files.
from fastcore.all import L
# suppose the yaml files are under a single folder
file_list = L(sorted(Path('kras').glob("*.yaml")))
run_boltz(file_list,key)Results analysis
Download results either from website sandbox, or through CLI
Get optimization score and affinity score, merge with df that contains experimental data
plot_scatter_spearman
def plot_scatter_spearman(
data, x, y, ax:NoneType=None
):
Plot scatter + Spearman correlation and p-value annotation.
x_vars = ['log10_Kd', 'log10_IC50', 'log10_erk_IC50']
y_vars = ['Optimization', 'Binding', 'Structure_confidence']
fig, axes = plt.subplots(len(x_vars), len(y_vars), figsize=(18, 18))
for i, x_var in enumerate(x_vars):
for j, y_var in enumerate(y_vars):
plot_scatter_spearman(df, x_var, y_var, ax=axes[i, j])
plt.tight_layout()
plt.show()