Analyze

Setup

Read summary confidences json


source

read_summary_json

 read_summary_json (json_path)

Read json file to dictionary

data = read_summary_json('data/seq_only_summary_confidences.json')
data
{'ID': 'seq_only_summary_confidences',
 'chain_iptm_0': None,
 'chain_pair_iptm_0_0': 0.72,
 'chain_pair_pae_min_0_0': 0.76,
 'chain_ptm_0': 0.72,
 'fraction_disordered': 0.19,
 'has_clash': 0.0,
 'iptm': None,
 'ptm': 0.72,
 'ranking_score': 0.82}

source

get_summary_df

 get_summary_df (output_dir)

Pack the summary json from the output folder to the df

out = get_summary_df('data')
out
1 summary_confidences.json files detected
ID chain_iptm_0 chain_pair_iptm_0_0 chain_pair_pae_min_0_0 chain_ptm_0 fraction_disordered has_clash iptm ptm ranking_score
0 seq_only_summary_confidences None 0.72 0.76 0.72 0.19 0.0 None 0.72 0.82

Specific for protein pairs


source

process_summary_df

 process_summary_df (df, generate_report=False)

Post process the json-converted pandas df; remove redundant columns; available for pairs

# out2 = process_summary_df(out)

source

get_top_cases

 get_top_cases (df, n=30)

Get top cases from the metric

# genes = get_top_cases(out2)

source

get_3d_report

 get_3d_report (df, index_list, x='iptm', y='ptm',
                z='chain_pair_pae_min_add', save_dir='af_report')

Generate 3d plot html file given case index and x, y, z colname

# get_3d_report(out2,genes)

source

get_report

 get_report (out_dir, save_dir='af_report')

Generate summary report based on summary_confidences file; return summary df and top cases

df_sum, top_genes = get_report('af_output/data','af_report/proteinA')

df_sum.sort_values('iptm_ptm_rnk_add').head(10)

source

copy_file

 copy_file (idx_name, source_dir, dest_dir)

Copy all model cif generated by AF3 to the new dest folder

from fastcore.utils import L
copy_file('proA_proB',source_dir='af_output/proA',dest_dir='af_top')
# Or 
L(top_genes).map(copy_file,source_dir='af_output/proA',dest_dir='af_top')

End