Boltz docking

Setup

CLI

Obtain API key from boltz.bio

# Install the CLI
pip install boltz-lab

# Set your API key
boltz-lab config --api-key "boltzpk_live_A3L3caM_...TRUNCATED..."

# Submit without waiting, good for virtual screening
boltz-lab predict job.yaml --no-wait --name "my_custom_name"

# waits + downloads locally, for single run
boltz-lab predict job.yaml --output ./results --name "my_custom_name"

# List all jobs
boltz-lab list

# Check status later
boltz-lab status <prediction-id>

# Download when complete
boltz-lab download <prediction-id> --output ./results

Prepare YAML


source

prepare_boltz


def prepare_boltz(
    seq:str, # Amino acid sequence of the protein the protein
    smiles:str, # SMILES string of the ligand
    fname:str, # Output filename (should end with .yaml)
):

Create a YAML file for protein-ligand affinity prediction.

from tqdm import tqdm
tqdm.pandas()

# mutate G12D from human WT seq
seq = "MTEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQRVEDAFYTLVREIRQYRLKKISKEEKTPGCVKIKKCIIM"
df.progress_apply(lambda r: prepare_boltz(seq,r.SMILES,f"kras_g12d/{r.ID}.yaml") ,axis=1)

Run in batch


source

run_boltz


def run_boltz(
    file_list:list, # list of .yaml path in Pathlib object
    api_key, # API key for Boltz-Lab
    job_name:NoneType=None, # job name appeared in boltz
):

Run Boltz-Lab predictions for a list of YAML files.

from fastcore.all import L

# suppose the yaml files are under a single folder
file_list = L(sorted(Path('kras').glob("*.yaml")))

run_boltz(file_list,key)

Results analysis

Download results either from website sandbox, or through CLI

Get optimization score and affinity score, merge with df that contains experimental data


source

plot_scatter_spearman


def plot_scatter_spearman(
    data, x, y, ax:NoneType=None
):

Plot scatter + Spearman correlation and p-value annotation.

x_vars = ['log10_Kd', 'log10_IC50', 'log10_erk_IC50']
y_vars = ['Optimization', 'Binding', 'Structure_confidence']

fig, axes = plt.subplots(len(x_vars), len(y_vars), figsize=(18, 18))

for i, x_var in enumerate(x_vars):
    for j, y_var in enumerate(y_vars):
        plot_scatter_spearman(df, x_var, y_var, ax=axes[i, j])

plt.tight_layout()
plt.show()

End