Source code for mldft.utils.draw_molecule

import argparse
import sys
from pathlib import Path

from rdkit import Chem
from rdkit.Chem import Draw, rdDepictor, rdDetermineBonds


[docs] def load_molecule(path: Path) -> Chem.Mol: """Load a molecule from a .xyz or .sdf file. In case of xyz files the bond orders are tried to be determined automatically. Args: path (Path): Path to the file Returns: Chem.Mol: RDKit molecule object """ if path.suffix == ".sdf": mol = Chem.MolFromMolFile(str(path)) elif path.suffix == ".xyz": xyz_block = cleanup_xyz(path) mol = Chem.MolFromXYZBlock(xyz_block) rdDetermineBonds.DetermineBonds(mol) rdDetermineBonds.DetermineBondOrders(mol) else: raise ValueError( f"Unknown file type {path.suffix}, only .xyz and .sdf are currently implemented" ) return mol
[docs] def cleanup_via_smiles(mol: Chem.Mol) -> Chem.Mol: """Convert a molecule to a smiles string and back to a molecule to clean it up for drawing. Args: mol (Chem.Mol): RDKit molecule object Returns: Chem.Mol: Cleaned up RDKit molecule object """ smiles_str = Chem.MolToSmiles(mol) mol = Chem.MolFromSmiles(smiles_str) return mol
# This function is especially required for the QM9 xyz files as they contain additional information # which rdkit cannot handle.
[docs] def cleanup_xyz(path: Path) -> str: """Read an xyz file and return a cleaned up version of it which rdkit can handle. Args: path (Path): Path to the xyz file Returns: str: Cleaned up xyz block """ fixed_xyz = "" with open(path) as f: lines = f.readlines() n_atoms = int(lines[0]) lines = lines[2 : n_atoms + 2] fixed_xyz += f"{n_atoms}\n\n" for line in lines: atom, x, y, z = line.split()[:4] fixed_xyz += f"{atom} {x} {y} {z}\n" return fixed_xyz
[docs] def draw_molecule(mol: Chem.Mol, width: int, height: int) -> str: """Draw a molecule to an SVG image. Args: mol (Chem.Mol): RDKit molecule object width (int): Width of the image height (int): Height of the image Returns: str: SVG image """ img = Draw.rdMolDraw2D.MolDraw2DSVG(width, height) opts = img.drawOptions() opts.updateAtomPalette({i: (0, 0, 0) for i in range(1, 118)}) img.DrawMolecule( mol, ) img.FinishDrawing() svg = img.GetDrawingText() return svg
[docs] def set_general_options() -> None: """Set general options for the RDKit. Currently just a stub for setting the preferCoordGen option and might be later expanded. """ rdDepictor.SetPreferCoordGen(True)
[docs] def write_file(svg: str, path: Path) -> None: """Write an SVG image to a file. Args: svg (str): SVG image path (Path): Path to the output file """ with open(path, "w") as f: f.write(svg)
[docs] def parse_args(args: list) -> argparse.Namespace: """Parse command line arguments.""" parser = argparse.ArgumentParser() parser.add_argument("input", type=Path, help="Path to the input file") parser.add_argument("-o", "--output", type=Path, help="Path to the output file", default=None) parser.add_argument("--width", type=int, default=500, help="Width of the output image") parser.add_argument("--height", type=int, default=500, help="Height of the output image") return parser.parse_args(args)
def main(): args = parse_args(sys.argv[1:]) set_general_options() mol = load_molecule(args.input) mol = cleanup_via_smiles(mol) svg = draw_molecule(mol, args.width, args.height) if args.output is None: args.output = Path.cwd() / f"{args.input.stem}.svg" write_file(svg, args.output) if __name__ == "__main__": main()