from kdock.af3.json import *
from kdock.af3.docker import *
import pandas as pd
Virtual screening
Setup
Run single protein default pipeline
Json:
= get_protein_json('proteinA','AAA','data/proteinA.json',seeds=[1,2,3]) data
data
{'name': 'proteinA',
'modelSeeds': [1, 2, 3],
'sequences': [{'protein': {'id': 'A', 'sequence': 'AAA'}}],
'bondedAtomPairs': [],
'dialect': 'alphafold3',
'version': 3}
Docker command
Move the generated proteinA.json
to the af_input/project_name
folder
='sdf' project_name
=f"af_input/{project_name}/proteinA.json",
docker_single_full(json_path=f"af_output/{project_name}") output_dir
docker run --rm \
--volume "$HOME/af_input:/root/af_input" \
--volume "$HOME/af_output/sdf:/root/af_output" \
--volume "$HOME/af_model:/root/models" \
--volume "$HOME/af_database:/root/public_databases" \
--gpus "device=0" \
sky1ove/alphafold3 \
python run_alphafold.py \
--json_path=/root/af_input/sdf/proteinA.json \
--output_dir=/root/af_output \
--model_dir=/root/models
Prepare protein-smiles files
Read output json
= read_json('../af3/data/seq_only_data.json') protein_json
str(protein_json)[:1000]
'{\'dialect\': \'alphafold3\', \'version\': 2, \'name\': \'PDCD1_seq_only\', \'sequences\': [{\'protein\': {\'id\': \'A\', \'sequence\': \'LDSPDRPWNPPTFSPALLVVTEGDNATFTCSFSNTSESFVLNWYRMSPSNQTDKLAAFPEDRSQPGQDCRFRVTQLPNGRDFHMSVVRARRNDSGTYLCGAISLAPKAQIKESLRAELRVTERRAEVPTAHPSPSPRPAGQFQTLV\', \'modifications\': [], \'unpairedMsa\': ">query\\nLDSPDRPWNPPTFSPALLVVTEGDNATFTCSFSNTSESFVLNWYRMSPSNQTDKLAAFPEDRSQPGQDCRFRVTQLPNGRDFHMSVVRARRNDSGTYLCGAISLAPKAQIKESLRAELRVTERRAEVPTAHPSPSPRPAGQFQTLV\\n>UniRef90_UPI0009801507/25-167 [subseq from] Programmed cell death protein 1 n=10 Tax=Homo sapiens TaxID=9606 RepID=UPI0009801507\\nLDSPDRPWNPPTFSPALLVVTEGDNATFTCSFSNTSESFVLNWYRMSPSNQTDKLAAFPEDRSQPGQDCRFRVTQLPNGRDFHMSVVRARRNDSGTYLCGAISLAPKAQIKESLRAELRVTERRAEVPTAHPSPSPRPAGQFQ---\\n>UniRef90_A0A5F7ZCX7/24-168 [subseq from] Programmed cell death 1 n=1 Tax=Macaca mulatta TaxID=9544 RepID=A0A5F7ZCX7_MACMU\\n-ESPDRPWNPPTFSPALLLVTEGDNATFTCSFSNASESFVLNWYRMSPSNQTDKLAAFPEDRSQPGRDCRFRVTQLPNGRDFHMSVVRARRNDSGTYLCGAISLAPKAQIKESLRAELRVTERRAEVPTAHPSPSPRP'
Single protein-smile pair
= get_protein_smiles_json('smi_name','CCC',protein_json,'data/protein_smi.json') out
Multiple protein-smile pairs in a df
= pd.DataFrame({'idx':['a','b'],'smi':['CCC','OCO']})
df df
idx | smi | |
---|---|---|
0 | a | CCC |
1 | b | OCO |
for idx, smi in df.values:
= get_protein_smiles_json(idx,smi,protein_json,f'af_input/{project_name}/{idx}.json',seeds=[1,2,3]) _
This will generate many json files in the directory
Split file into multiple subfolder for multi-GPUs
f'af_input/{project_name}') split_nfolder(
Distributed 2 files into 4 folders.
Docker
docker pull sky1ove/alphafold3
for i in range(4):
=f"af_input/{project_name}/folder_{i}",
docker_multi_infer(input_dir=f"af_output/{project_name}",
output_dir=i)
gpus# norun_data_pipeline means skip template search as we already did in the first step
docker run --rm \
--volume "$HOME/af_input:/root/af_input" \
--volume "$HOME/af_output/sdf:/root/af_output" \
--volume "$HOME/af_model:/root/models" \
--volume "$HOME/af_database:/root/public_databases" \
--volume "$HOME/af_cache:/root/cache" \
--gpus "device=0" \
sky1ove/alphafold3 \
python run_alphafold.py \
--input_dir=/root/af_input/sdf/folder_0 \
--output_dir=/root/af_output \
--model_dir=/root/models \
--jax_compilation_cache_dir=/root/cache \
--norun_data_pipeline
docker run --rm \
--volume "$HOME/af_input:/root/af_input" \
--volume "$HOME/af_output/sdf:/root/af_output" \
--volume "$HOME/af_model:/root/models" \
--volume "$HOME/af_database:/root/public_databases" \
--volume "$HOME/af_cache:/root/cache" \
--gpus "device=1" \
sky1ove/alphafold3 \
python run_alphafold.py \
--input_dir=/root/af_input/sdf/folder_1 \
--output_dir=/root/af_output \
--model_dir=/root/models \
--jax_compilation_cache_dir=/root/cache \
--norun_data_pipeline
docker run --rm \
--volume "$HOME/af_input:/root/af_input" \
--volume "$HOME/af_output/sdf:/root/af_output" \
--volume "$HOME/af_model:/root/models" \
--volume "$HOME/af_database:/root/public_databases" \
--volume "$HOME/af_cache:/root/cache" \
--gpus "device=2" \
sky1ove/alphafold3 \
python run_alphafold.py \
--input_dir=/root/af_input/sdf/folder_2 \
--output_dir=/root/af_output \
--model_dir=/root/models \
--jax_compilation_cache_dir=/root/cache \
--norun_data_pipeline
docker run --rm \
--volume "$HOME/af_input:/root/af_input" \
--volume "$HOME/af_output/sdf:/root/af_output" \
--volume "$HOME/af_model:/root/models" \
--volume "$HOME/af_database:/root/public_databases" \
--volume "$HOME/af_cache:/root/cache" \
--gpus "device=3" \
sky1ove/alphafold3 \
python run_alphafold.py \
--input_dir=/root/af_input/sdf/folder_3 \
--output_dir=/root/af_output \
--model_dir=/root/models \
--jax_compilation_cache_dir=/root/cache \
--norun_data_pipeline