Source code for mdpy.io.charmm_toppar_parser

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
file : charmm_param_parser.py
created time : 2021/10/08
author : Zhenyu Wei
copyright : (C)Copyright 2021-present, mdpy organization
"""

import itertools
import numpy as np
from mdpy.error import *
from mdpy.unit import *
from mdpy.environment import *

RMIN_TO_SIGMA_FACTOR = NUMPY_FLOAT(2 ** (-1 / 6))
USED_BLOCK_LABELS = ["ATOMS", "BONDS", "ANGLES", "DIHEDRALS", "IMPROPER", "NONBONDED"]
UNUSED_BLOCK_LABELS = ["CMAP", "NBFIX", "HBOND", "END"]
BLOCK_LABELS = USED_BLOCK_LABELS + UNUSED_BLOCK_LABELS


[docs]class CharmmTopparParser:
[docs] def __init__(self, *file_path_list) -> None: # Read input self._file_path_list = file_path_list # Set attributes self._parameters = { "atom": [], "mass": {}, "charge": {}, "bond": {}, "angle": {}, "nonbonded": {}, "dihedral": {}, "improper": {}, } # Parse file for file_path in self._file_path_list: if file_path.startswith("toppar") or file_path.endswith("str"): self.parse_toppar_file(file_path) elif file_path.startswith("par") or file_path.endswith("prm"): self.parse_par_file(file_path) elif file_path.startswith("top") or file_path.endswith("rtf"): self.parse_top_file(file_path) else: raise FileFormatError( "Keyword: top, par, or toppar do not appear in %s, unsupported by CharmmTopparParser." % file_path.split("/")[-1] )
@property def parameters(self) -> dict: return self._parameters.copy() def parse_par_file(self, file_path): """Data info: - BONDS: V(bond) = Kb(b - b0)**2; - Kb: kcal/mole/A**2 - b0: A - ANGLES: V(angle) = Ktheta(Theta - Theta0)**2; - Ktheta: kcal/mole/rad**2 - Theta0: degrees DIHEDRALS: V(dihedral) = Kchi(1 + cos(n(chi) - delta)) - Kchi: kcal/mole - n: multiplicity - delta: degrees IMPROPER: V(improper) = Kpsi(psi - psi0)**2; - Kpsi: kcal/mole/rad**2 - psi0: degrees NONBONDED: V(Lennard-Jones) = Eps,i,j[(Rmin,i,j/ri,j)**12 - 2(Rmin,i,j/ri,j)**6] - epsilon: kcal/mole, Eps,i,j = sqrt(eps,i * eps,j) - Rmin/2: A, Rmin,i,j = Rmin/2,i + Rmin/2,j """ with open(file_path, "r") as f: info = f.read().split("\n") info_dict = self._fine_par_info(info) self._parse_par_mass_block(info_dict["ATOMS"]) self._parse_par_bond_block(info_dict["BONDS"]) self._parse_par_angle_block(info_dict["ANGLES"]) self._parse_par_dihedral_block(info_dict["DIHEDRALS"]) self._parse_par_improper_block(info_dict["IMPROPER"]) self._parse_par_nonbonded_block(info_dict["NONBONDED"]) @staticmethod def _fine_par_info(info): new_info = [] start_index = 0 for cur_index, cur_info in enumerate(info): for block_label in BLOCK_LABELS: if cur_info.startswith(block_label): new_info.append(info[start_index:cur_index]) start_index = cur_index break new_info.append(info[start_index:]) new_info = [i for i in new_info if i != []] # Avoid no parameter block info_dict = {} for info in new_info: head = info[0].lstrip() for block_label in USED_BLOCK_LABELS: if head.startswith(block_label): info_dict[block_label] = info for key, val in info_dict.items(): remove_list = [i for i in val if i.lstrip().startswith("!") or i == ""] [val.remove(i) for i in remove_list] info_dict[key] = [i.strip().split("!")[0].split() for i in val][1:] return info_dict def _embed_x_element(self, pair): if not "X" in pair: return [pair] else: res = [] pair = [ [i] if i != "X" else self._parameters["atom"] for i in pair.split("-") ] pairs = itertools.product(*pair) for pair in pairs: res.append("-".join(pair)) return res def _parse_par_mass_block(self, infos): for info in infos: self._parameters["atom"].append(info[2]) self._parameters["mass"][info[2]] = ( Quantity(float(info[3]), dalton).convert_to(default_mass_unit).value ) def _parse_par_bond_block(self, infos): for info in infos: res = [ Quantity(float(info[2]), kilocalorie_permol / angstrom**2) .convert_to(default_energy_unit / default_length_unit**2) .value, Quantity(float(info[3]), angstrom) .convert_to(default_length_unit) .value, ] target_keys = self._embed_x_element("%s-%s" % (info[0], info[1])) target_keys.extend(self._embed_x_element("%s-%s" % (info[1], info[0]))) for key in target_keys: if not key in self._parameters["bond"].keys(): self._parameters["bond"][key] = res def _parse_par_angle_block(self, infos): for info in infos: if len(info) == 5: res = [ Quantity(float(info[3]), kilocalorie_permol) .convert_to(default_energy_unit) .value, np.deg2rad(Quantity(float(info[4])).value), Quantity(0, kilocalorie_permol) .convert_to(default_energy_unit) .value, Quantity(0, angstrom).convert_to(default_length_unit).value, ] target_keys = self._embed_x_element( "%s-%s-%s" % (info[0], info[1], info[2]) ) target_keys.extend( self._embed_x_element("%s-%s-%s" % (info[2], info[1], info[0])) ) for key in target_keys: if not key in self._parameters["angle"].keys(): self._parameters["angle"][key] = res elif len(info) == 7: res = [ Quantity(float(info[3]), kilocalorie_permol) .convert_to(default_energy_unit) .value, np.deg2rad(Quantity(float(info[4])).value), Quantity(float(info[5]), kilocalorie_permol) .convert_to(default_energy_unit) .value, Quantity(float(info[6]), angstrom) .convert_to(default_length_unit) .value, ] target_keys = self._embed_x_element( "%s-%s-%s" % (info[0], info[1], info[2]) ) target_keys.extend( self._embed_x_element("%s-%s-%s" % (info[2], info[1], info[0])) ) for key in target_keys: if not key in self._parameters["angle"].keys(): self._parameters["angle"][key] = res def _parse_par_dihedral_block(self, infos): x_include_pairs = [] for info in infos: if "X" in "-".join(info[:4]): x_include_pairs.append(info) else: res = [ Quantity(float(info[4]), kilocalorie_permol) .convert_to(default_energy_unit) .value, Quantity(float(info[5])).value, np.deg2rad(Quantity(float(info[6])).value), ] target_keys = [ "%s-%s-%s-%s" % (info[0], info[1], info[2], info[3]), "%s-%s-%s-%s" % (info[3], info[2], info[1], info[0]), ] for key in target_keys: # if not key in self._parameters['dihedral'].keys(): if not key in self._parameters["dihedral"].keys(): self._parameters["dihedral"][key] = [res] else: self._parameters["dihedral"][key].append(res) for info in x_include_pairs: res = [ Quantity(float(info[4]), kilocalorie_permol) .convert_to(default_energy_unit) .value, Quantity(float(info[5])).value, np.deg2rad(Quantity(float(info[6])).value), ] target_keys = self._embed_x_element( "%s-%s-%s-%s" % (info[0], info[1], info[2], info[3]) ) target_keys.extend( self._embed_x_element( "%s-%s-%s-%s" % (info[3], info[2], info[1], info[0]) ) ) for key in target_keys: if not key in self._parameters["dihedral"].keys(): self._parameters["dihedral"][key] = [res] def _parse_par_improper_block(self, infos): for info in infos: res = [ Quantity(float(info[4]), kilocalorie_permol) .convert_to(default_energy_unit) .value, np.deg2rad(Quantity(float(info[6])).value), ] target_keys = [] for target_key in itertools.permutations(info[:4]): target_keys.extend(self._embed_x_element("%s-%s-%s-%s" % (target_key))) for key in target_keys: if not key in self._parameters["improper"].keys(): self._parameters["improper"][key] = res def _parse_par_nonbonded_block(self, infos): for info in infos[1:]: if len(info) == 4: self._parameters["nonbonded"][info[0]] = [ -Quantity(float(info[2]), kilocalorie_permol) .convert_to(default_energy_unit) .value, Quantity(float(info[3]), angstrom) .convert_to(default_length_unit) .value * 2 * RMIN_TO_SIGMA_FACTOR, ] else: self._parameters["nonbonded"][info[0]] = [ -Quantity(float(info[2]), kilocalorie_permol) .convert_to(default_energy_unit) .value, Quantity(float(info[3]), angstrom) .convert_to(default_length_unit) .value * 2 * RMIN_TO_SIGMA_FACTOR, -Quantity(float(info[5]), kilocalorie_permol) .convert_to(default_energy_unit) .value, Quantity(float(info[6]), angstrom) .convert_to(default_length_unit) .value * 2 * RMIN_TO_SIGMA_FACTOR, ] def parse_top_file(self, file_path): with open(file_path, "r") as f: info = f.read().split("\n") info_dict = self._fine_top_info(info) self._parse_top_charge_block(info_dict) @staticmethod def _fine_top_info(info): new_info = [] start_index = 0 for cur_index, cur_info in enumerate(info): if cur_info.startswith("RESI") or cur_info.startswith("PRES"): new_info.append(info[start_index:cur_index]) start_index = cur_index new_info.append(info[start_index:]) new_info = new_info[1:] info_dict = {} for info in new_info: key = info[0].split()[1] remove_list = [i for i in info if not i.startswith("ATOM")] [info.remove(i) for i in remove_list] info_dict[key] = [i.strip().split("!")[0].split() for i in info] return info_dict def _parse_top_charge_block(self, info_dict): for key, val in info_dict.items(): for line in val: if key != line[2]: self._parameters["charge"]["%s-%s" % (key, line[2])] = ( Quantity(float(line[3]), elementary_charge) .convert_to(default_charge_unit) .value ) else: # group name is the same as atom name: ion self._parameters["charge"]["%s" % key] = ( Quantity(float(line[3]), elementary_charge) .convert_to(default_charge_unit) .value ) def parse_toppar_file(self, file_path): with open(file_path, "r") as f: info = f.read().split("\n") top_info_dict, par_info_dict = self._fine_toppar_info(info) # Top data self._parse_top_charge_block(top_info_dict) # Par data self._parse_par_mass_block(par_info_dict["ATOMS"]) self._parse_par_bond_block(par_info_dict["BONDS"]) self._parse_par_angle_block(par_info_dict["ANGLES"]) self._parse_par_dihedral_block(par_info_dict["DIHEDRALS"]) self._parse_par_improper_block(par_info_dict["IMPROPER"]) self._parse_par_nonbonded_block(par_info_dict["NONBONDED"]) def _fine_toppar_info(self, info): for i, j in enumerate(info): if j.startswith("END"): split_index = i break top_info, par_info = info[: split_index + 1], info[split_index:] return self._fine_top_info(top_info), self._fine_par_info(par_info)