Move json files from a folder into subfolders (folder_0, folder_1, …, folder_N).
split_nfolder(f'af_input/{project_name}')
Distributed 2 files into 4 folders.
End
Reference:
# import collections# from collections.abc import Mapping, Sequence# from absl import logging# from alphafold3.cpp import cif_dict# import numpy as np# import rdkit.Chem as rd_chem# from rdkit.Chem import AllChem as rd_all_chem# def mol_to_ccd_cif(# mol: rd_chem.Mol,# component_id: str,# pdbx_smiles: str | None = None,# include_hydrogens: bool = True,# ) -> cif_dict.CifDict:# """Creates a CCD-like mmcif data block from an rdkit Mol object.# Only a subset of associated mmcif fields is populated, but that is# sufficient for further usage, e.g. in featurization code.# Atom names can be specified via `atom_name` property. For atoms with# unspecified value of that property, the name is assigned based on element type# and the order in the Mol object.# If the Mol object has associated conformers, atom positions from the first of# them will be populated in the resulting mmcif file.# Args:# mol: An rdkit molecule.# component_id: Name of the molecule to use in the resulting mmcif. That is# equivalent to CCD code.# pdbx_smiles: If specified, the value will be used to populate# `_chem_comp.pdbx_smiles`.# include_hydrogens: Whether to include atom and bond data involving# hydrogens.# Returns:# An mmcif data block corresponding for the given rdkit molecule.# Raises:# UnsupportedMolBond: When a molecule contains a bond that can't be# represented with mmcif.# """# mol = rd_chem.Mol(mol)# if include_hydrogens:# mol = rd_chem.AddHs(mol)# rd_chem.Kekulize(mol)# if mol.GetNumConformers() > 0:# ideal_conformer = mol.GetConformer(0).GetPositions()# ideal_conformer = np.vectorize(lambda x: f'{x:.3f}')(ideal_conformer)# else:# # No data will be populated in the resulting mmcif if the molecule doesn't# # have any conformers attached to it.# ideal_conformer = None# mol_cif = collections.defaultdict(list)# mol_cif['data_'] = [component_id]# mol_cif['_chem_comp.id'] = [component_id]# if pdbx_smiles:# mol_cif['_chem_comp.pdbx_smiles'] = [pdbx_smiles]# mol = assign_atom_names_from_graph(mol, keep_existing_names=True)# for atom_idx, atom in enumerate(mol.GetAtoms()):# element = atom.GetSymbol()# if not include_hydrogens and element in ('H', 'D'):# continue# mol_cif['_chem_comp_atom.comp_id'].append(component_id)# mol_cif['_chem_comp_atom.atom_id'].append(atom.GetProp('atom_name'))# mol_cif['_chem_comp_atom.type_symbol'].append(atom.GetSymbol().upper())# mol_cif['_chem_comp_atom.charge'].append(str(atom.GetFormalCharge()))# if ideal_conformer is not None:# coords = ideal_conformer[atom_idx]# mol_cif['_chem_comp_atom.pdbx_model_Cartn_x_ideal'].append(coords[0])# mol_cif['_chem_comp_atom.pdbx_model_Cartn_y_ideal'].append(coords[1])# mol_cif['_chem_comp_atom.pdbx_model_Cartn_z_ideal'].append(coords[2])# for bond in mol.GetBonds():# atom1 = bond.GetBeginAtom()# atom2 = bond.GetEndAtom()# if not include_hydrogens and (# atom1.GetSymbol() in ('H', 'D') or atom2.GetSymbol() in ('H', 'D')# ):# continue# mol_cif['_chem_comp_bond.comp_id'].append(component_id)# mol_cif['_chem_comp_bond.atom_id_1'].append(# bond.GetBeginAtom().GetProp('atom_name')# )# mol_cif['_chem_comp_bond.atom_id_2'].append(# bond.GetEndAtom().GetProp('atom_name')# )# try:# bond_type = bond.GetBondType()# # Older versions of RDKit did not have a DATIVE bond type. Convert it to# # SINGLE to match the AF3 training setup.# if bond_type == rd_chem.BondType.DATIVE:# bond_type = rd_chem.BondType.SINGLE# mol_cif['_chem_comp_bond.value_order'].append(# _RDKIT_BOND_TYPE_TO_MMCIF[bond_type]# )# mol_cif['_chem_comp_bond.pdbx_stereo_config'].append(# _RDKIT_BOND_STEREO_TO_MMCIF[bond.GetStereo()]# )# except KeyError as e:# raise UnsupportedMolBondError from e# mol_cif['_chem_comp_bond.pdbx_aromatic_flag'].append(# 'Y' if bond.GetIsAromatic() else 'N'# )# return cif_dict.CifDict(mol_cif)