Source code for mdpy.io.pdb_parser

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
file : pdb_parser.py
created time : 2021/10/03
author : Zhenyu Wei
copyright : (C)Copyright 2021-present, mdpy organization
"""

import warnings
import numpy as np
import MDAnalysis as mda
from MDAnalysis.topology.guessers import guess_atom_type
from mdpy import SPATIAL_DIM
from mdpy.error import *
from mdpy.environment import *


[docs]class PDBParser:
[docs]    def __init__(self, file_path, is_parse_all=True) -> None:
        # Initial reader and parser setting
        if not file_path.endswith(".pdb"):
            raise FileFormatError("The file should end with .pdb suffix")
        self._file_path = file_path
        self._is_parse_all = is_parse_all
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            self._reader = mda.coordinates.PDB.PDBReader(self._file_path)
            self._parser = mda.topology.PDBParser.PDBParser(self._file_path).parse()
        # Parse data
        self._num_particles = self._parser.n_atoms
        self._particle_ids = list(self._parser.ids.values)
        self._particle_types = list(map(guess_atom_type, self._parser.names.values))
        self._particle_names = list(self._parser.names.values)
        self._matrix_ids = list(
            np.linspace(
                0, self._num_particles - 1, self._num_particles, dtype=NUMPY_INT
            )
        )
        molecule_ids, molecule_types = (
            self._parser.resids.values,
            self._parser.resnames.values,
        )
        self._molecule_ids, self._molecule_types = [], []
        for i in range(self._parser.n_atoms):
            resid = self._parser.tt.atoms2residues(i)
            self._molecule_ids.append(molecule_ids[resid])
            self._molecule_types.append(molecule_types[resid])
        self._chain_ids = list(self._parser.chainIDs.values)
        self._num_frames = self._reader.trajectory.n_frames
        if self._is_parse_all:
            if self._num_frames == 1:
                self._positions = self._reader.ts.positions.astype(NUMPY_FLOAT)
            else:
                self._positions = [
                    ts.positions.astype(NUMPY_FLOAT) for ts in self._reader.trajectory
                ]
                self._positions = np.stack(self._positions)
        self._pbc_matrix = self._reader.ts.triclinic_dimensions

    def get_matrix_id(self, particle_id):
        return self._particle_ids.index(particle_id)

    def get_particle_info(self, particle_id):
        matrix_id = self.get_matrix_id(particle_id)
        return {
            "particle_id": self._particle_ids[matrix_id],
            "particle_type": self._particle_types[matrix_id],
            "particle_name": self._particle_names[matrix_id],
            "molecule_id": self._molecule_ids[matrix_id],
            "molecule_type": self._molecule_types[matrix_id],
            "chain_id": self._chain_ids[matrix_id],
            "matrix_id": matrix_id,
            "position": self._positions[matrix_id, :],
        }

    def get_positions(self, *frames):
        num_target_frames = len(frames)
        if num_target_frames == 1:
            if frames[0] >= self._num_frames:
                raise ArrayDimError(
                    "%d beyond the number of frames %d stored in pdb file"
                    % (frames[0], self._num_frames)
                )
            result = (
                self._reader.trajectory[frames[0]].positions.copy().astype(NUMPY_FLOAT)
            )
        else:
            result = np.zeros([num_target_frames, self._num_particles, SPATIAL_DIM])
            for index, frame in enumerate(frames):
                if frame >= self._num_frames:
                    raise ArrayDimError(
                        "%d beyond the number of frames %d stored in pdb file"
                        % (frame, self._num_frames)
                    )
                result[index, :, :] = self._reader.trajectory[frame].positions.astype(
                    NUMPY_FLOAT
                )
        return result

    @property
    def particle_ids(self):
        return self._particle_ids

    @property
    def particle_types(self):
        return self._particle_types

    @property
    def particle_names(self):
        return self._particle_names

    @property
    def molecule_ids(self):
        return self._molecule_ids

    @property
    def molecule_types(self):
        return self._molecule_types

    @property
    def chain_ids(self):
        return self._chain_ids

    @property
    def num_frames(self):
        return self._num_frames

    @property
    def num_particles(self):
        return self._num_particles

    @property
    def positions(self) -> np.ndarray:
        if not self._is_parse_all:
            raise IOPoorDefinedError(
                "positions property is not supported as `is_parse_all==False`, calling `get_position` method"
            )
        return self._positions.copy()

    @property
    def pbc_matrix(self) -> np.ndarray:
        return self._pbc_matrix.copy()