Source code for d3tales_api.D3database.info_from_smiles

import base64
import sys, os
import selfies as sf
from PIL import Image
from tqdm import tqdm
from io import BytesIO
import pubchempy as pcp
from d3tales_api.D3database.schema2class import Schema2Class

from rdkit import Chem
from rdkit.Chem.rdchem import Mol
from rdkit.Chem.rdmolops import GetFormalCharge, AddHs
from rdkit.Chem.Descriptors import NumRadicalElectrons
from rdkit.Chem.inchi import MolToInchi, MolToInchiKey
from rdkit.Chem import Draw, MolFromSmiles, MolToSmiles, AllChem
from rdkit.Chem.rdMolDescriptors import CalcMolFormula, CalcExactMolWt
sys.path.append(os.path.join(Chem.RDConfig.RDContribDir, 'SA_Score'))
import sascorer


[docs]def find_lowest_e_conf(smiles, num_conf=50): """ Find the lowest energy conformer for a molecule with RDKit :param smiles: str, SMILES string :param num_conf: float, number of conformers to serach :return: str, xyz coordinates """ rdkmol = Chem.AddHs(Chem.MolFromSmiles(smiles)) results = {} AllChem.EmbedMultipleConfs(rdkmol, numConfs=num_conf, params=AllChem.ETKDG()) results_MMFF = AllChem.MMFFOptimizeMoleculeConfs(rdkmol, maxIters=5000) for i, result in tqdm(enumerate(results_MMFF)): results[i] = result[1] best_idx = min(results, key=results.get) structure = Chem.rdmolfiles.MolToXYZBlock(rdkmol, confId=best_idx) return structure
[docs]def image_to_base64(img): """ Produce base 64 string representation of image :param img: python Image object :return: str representing the image """ output_buffer = BytesIO() img.save(output_buffer, format='PNG') byte_data = output_buffer.getvalue() base64_str = base64.b64encode(byte_data) return base64_str.decode('ascii')
[docs]def base64_to_image(base64_str, image_path=None): """ Produce python Image object from base 64 string representation of image :param base64_str: base64 string representing image :param image_path: path for image to be saved (optional) :return: python Image object """ byte_data = base64.b64decode(base64_str) image_data = BytesIO(byte_data) img = Image.open(image_data) if image_path: img.save(image_path) return img
[docs]class GenerateMolInfo: """ Generate json object for insertion from smiles string Copyright 2021, University of Kentucky :param names: list of names for molecule :param smiles: smiles string :param origin_group: which group the molecule comes from :return: mol_info class object """
[docs] def __init__(self, smiles, origin_group="", names=[], extra_info=True, database='frontend', schema_name="mol_info", ground_spin=None, ground_charge=None): self.smiles = smiles self.origin_group = origin_group self.names = names self.database = database self.schema_name = schema_name self.extra_info = extra_info self.ground_spin = ground_spin self.ground_charge = ground_charge self.mol_info_dict = self.get_mol_info()
[docs] def get_mol_info(self): """ Get molecule information :return: mol_info as dict """ # Fetch schema and build class s2c = Schema2Class(schema_name=self.schema_name, database=self.database) mol_info = s2c.MolInfo() # Generate rdkit mol and final (cleaned) smiles rdkmol = MolFromSmiles(self.smiles) clean_smile = MolToSmiles(rdkmol) rdkmol_hs = AddHs(rdkmol) AllChem.EmbedMolecule(rdkmol_hs) pcpmol = pcp.get_compounds(clean_smile, namespace="smiles")[0] # Populate class mol_info.smiles = clean_smile mol_info.selfies = sf.encoder(clean_smile) if self.origin_group: mol_info.source_group = self.origin_group mol_info.inchi = MolToInchi(rdkmol) mol_info.inchi_key = MolToInchiKey(rdkmol) mol_info.iupac_name = str(pcpmol.iupac_name) mol_info.molecular_formula = CalcMolFormula(rdkmol) mol_info.number_of_atoms = Mol.GetNumAtoms(rdkmol) mol_info.molecular_weight = CalcExactMolWt(rdkmol) mol_info.groundState_charge = self.ground_charge or GetFormalCharge(rdkmol) mol_info.groundState_spin = self.ground_spin or NumRadicalElectrons(rdkmol) + 1 # calculate spin multiplicity with Hand's rule mol_info.sa_score = round(sascorer.calculateScore(rdkmol), 2) if self.extra_info: mol_info.d2_image = image_to_base64(Draw.MolToImage(rdkmol)) mol_info.init_structure = find_lowest_e_conf(clean_smile) try: mol_info.synonyms = self.names + pcpmol.synonyms except TypeError: mol_info.synonyms = self.names return mol_info.as_dict()