Source code for pybec.parsers

"""
parsers.py
Module that mainly deals with extracting information from QuantumEspresso Output

Handles the primary functions
"""


import numpy as np
import os
import logging
import glob
from collections import OrderedDict

log = logging.getLogger(__name__)
#log.setLevel(logging.DEBUG)

[docs]def get_trajectory(output_file, unit_multiplier=1.0):
    """
    Retrieves the final positions from the last step of the specified output file.

    Parameters
    ----------
    output_file : str
        File path to the output file for the step from which to pull trajectory.

    Returns
    -------
    dict
        Ionic coordinates at each timestep in a dictionary where the keys are the element symbols and
        the values are the numpy coordinate array for all atoms of that element.
    """
    if not os.path.isfile(output_file):
        raise ValueError("Output File {} is not a valid file path".format(output_file))

    # strings to search for
    pos_start = 'ATOMIC_POSITIONS'
    stepline = 'Physical Quantities at step:'

    # flag to store whether we have encountered a block of positions
    pos_block = False

    # store the atomic positions
    stepnum = None
    traj = {}

    with open(output_file, 'r') as f:
        for line in f:  # iterate through the lines.  Go line by line to not over-use memory.
            # make sure system converged
            if stepline in line:
                stepnum = int(line.split(':')[1])

            # find the pos block
            elif pos_start in line:
                pos_block = True
                pos = {}  # erase earlier forces, keeping only most recent

            # store the positions
            elif pos_block:
                line = line.split()
                if not line:  # reached end of position block
                    pos_block = False
                    traj[stepnum] = pos  # store positions with step number
                else:
                    try:
                        pos[line[0]].append(line[1:])
                    except KeyError:
                        pos[line[0]] = [line[1:]]
    for key1 in traj:
        for key2 in traj[key1]:
            traj[key1][key2] = np.array(traj[key1][key2], dtype=np.float64) * unit_multiplier
    return traj

[docs]def get_dipole(output_file):
    """
    Retrieves the electric and ionic polarizations from output file.

    Parameters
    ----------
    output_file : str
        File path to the output file for the step from which to pull polarizations.

    Returns
    -------
    tuple
        header, pols. Header is a list of the header names associated with
        pols. pols is an N X 5 np.ndarray. For each of N steps, it contains The
        associated step number, timestep, electronic, ionic, and total cell dipole.
    """
    if os.path.isfile(output_file):
        with open(output_file, 'r') as f1:
            line_1 = ''
            pols = []
            ts_found = False
            for line in f1:
                if not ts_found and "MD Simulation time step" in line:
                    ts = float(line.split('=')[1])
                elif line.startswith('Elct. dipole'):
                    step = float(line_1.split()[0])
                    e_dipole = float(line.split()[3])
                    i_dipole = float(line.split()[7])
                    pols.append([step, ts, e_dipole, i_dipole, e_dipole+i_dipole])
                line_1 = line
        return ['Step', 'Dt', 'E_Dipole', 'I_Dipole', 'Tot_Dipole'], np.array(pols)
    else:
        raise ValueError('Invalid input file name')

[docs]def get_full_dipole(directory, remove_dupes=True):
    """
    Gets the trajectory of the polarization.

    Parameters
    ----------

    directory : str
        The path to a directory containing all of the output files to parse for polarization data.

    remove_dupes : bool
        If true, keep only one of each step number, the one from the last file
        it occurs in.

    Returns
    -------

    numpy.ndarray
        Nx2 array with the first column containing the time and second column containing
        the overall polarization. Duplicate timesteps are removed during processing.

    """
    files = glob.glob(os.path.join(directory, '*'))
    files.sort()
    if remove_dupes:
        pols = {}
        for file in files:
            _, pol = get_dipole(file)
            if len(pol):
                for i, p in enumerate(pol):
                    pols[pol[i, 0]] = pol[i]
        pols = OrderedDict(sorted(pols.items()))
        pols = np.concatenate([pols[step].reshape(1,-1) for step in pols], axis=0)
    else:
        pols = []
        for file in files:
            _, pol = get_dipole(file)
            pols.append(pol)
        pols = np.concatenate(pols, axis=0)
    return pols

[docs]def get_final_positions(output_file, unit_multiplier=1.0):
    """
    Retrieves the final positions from the last step of the specified output file.

    Parameters
    ----------
    output_file : str
        File path to the output file for the step from which to pull converged forces.

    Returns
    -------
    dict
        Ionic coordinates in a dictionary where the keys are the element symbols and
        the values are the numpy coordinate array for all atoms of that element.
    """
    if not os.path.isfile(output_file):
        raise ValueError("Output File {} is not a valid file path".format(output_file))

    # strings to search for
    converged_str = 'convergence achieved for system relaxation'
    pos_start = 'ATOMIC_POSITIONS'

    # store whether system is converged
    converged = False

    # flag to store whether we have encountered a block of forces
    pos_block = False

    # store the forces
    pos = {}

    with open(output_file, 'r') as f:
        for line in f:    # iterate through the lines.  Go line by line to not over-use memory.
            # make sure system converged
            if converged_str in line:
                converged = True

            # find the force block
            elif pos_start in line:
                pos_block = True
                pos = {}    # erase earlier forces, keeping only most recent

            # store the forces, keeping only the most recent forces
            elif pos_block:
                line = line.split()
                if line == []:    # reached end of force block
                    pos_block = False
                else:
                    try:
                        pos[line[0]].append(line[1:])
                    except KeyError:
                        pos[line[0]] = [line[1:]]

    for key in pos:
        pos[key] = np.array(pos[key], dtype=np.float64) * unit_multiplier
    if not converged:
        print("WARNING: Positions don't seem to have converged.")
    return pos


[docs]def get_final_cell(output_file, unit_multiplier=1.0):
    """
    Retrieves the final cell vectors from the last step of the specified output file.

    Parameters
    ----------
    output_file : str
        File path to the output file for the step from which to pull converged forces.

    Returns
    -------
    np.ndarray
        A 3x3 array of the cell vectors, as 3 row vectors [[a1,a2,a3],[b1,b2,b3],[c1,c2,c3]]

    """
    if not os.path.isfile(output_file):
        raise ValueError("Output File {} is not a valid file path".format(output_file))

    # strings to search for
    converged_str = 'convergence achieved for system relaxation'
    cell_start = 'CELL_PARAMETERS'

    # store whether system is converged
    converged = False

    # flag to store whether we have encountered a block of forces
    cell_block = False

    # store the forces
    cell = []

    with open(output_file, 'r') as f:
        for line in f:    # iterate through the lines.  Go line by line to not over-use memory.
            # make sure system converged
            if converged_str in line:
                converged = True

            # find the force block
            elif cell_start in line:
                cell_block = True
                cell = []    # erase earlier forces, keeping only most recent

            # store the forces, keeping only the most recent forces
            elif cell_block:
                line = line.split()
                if line == []:    # reached end of force block
                    cell_block = False
                else:
                    cell.append(line)

    cell = np.array(cell, dtype=np.float64) * unit_multiplier
    if not converged:
        print("WARNING: Cell doesn't seem to have converged.")
    return cell


[docs]def get_converged_forces(output_file):
    """
    Retrieves the converged forces from the last step of the specified output file.

    Parameters
    ----------
    output_file : str
        File path to the output file for the step from which to pull converged forces.

    Returns
    -------
    dict
        Ionic forces in a dictionary where the keys are the element symbols and
        the values are the numpy force array for all atoms of that element.
    """
    if not os.path.isfile(output_file):
        raise ValueError("Output File {} is not a valid file path".format(output_file))

    # strings to search for
    converged_str = 'convergence achieved for system relaxation'
    force_start = 'Forces acting on atoms (au):'

    # store whether system is converged
    converged = False

    # flag to store whether we have encountered a block of forces
    force_block = False

    # store the forces
    forces = {}

    with open(output_file, 'r') as f:
        for line in f:    # iterate through the lines.  Go line by line to not over-use memory.
            # make sure system converged
            if converged_str in line:
                converged = True

            # find the force block
            elif force_start in line:
                force_block = True
                forces = {}    # erase earlier forces, keeping only most recent

            # store the forces, keeping only the most recent forces
            elif force_block:
                line = line.split()
                if line == []:    # reached end of force block
                    force_block = False
                else:
                    try:
                        forces[line[0]].append(line[1:])
                    except KeyError:
                        forces[line[0]] = [line[1:]]

    if converged:
        for key in forces:
            forces[key] = np.array(forces[key], dtype=np.float64)
        return forces
    else:
        raise Exception("Forces don't seem to have converged.  Check step and run again.")

[docs]def get_coordinates(xyz_file, skip=2, delimiter=None):
    """
    Loads the XYZ coordinates of the specified file.

    Parameters
    ----------
    xyz_file : str
        File path to the .xyz file containing the relaxed crystal structure

    delimiter : str
        delimiter separating x, y, z coordinates in the file, to be used
        by numpy's genfromtxt method

    Returns
    -------
    dict
        The coordinates in angstroms in a dictionary where the keys
        are the element symbols and the values are the numpy
        arrays of the coordinates for all atoms of that element.
    """
    if not os.path.isfile(xyz_file):
        raise ValueError("XYZ File {} is not a valid file path".format(xyz_file))

    coords_arr = np.genfromtxt(xyz_file, skip_header=skip, delimiter=delimiter, dtype=str)

    # store the coordinates
    coords = {}

    for element in np.unique(coords_arr[:,0]):    # make each unique element a key in the coords dict
        coords[element] = coords_arr[coords_arr[:,0] == element][:,1:].astype(np.float64)

    return coords


[docs]def get_lattice(xyz_file):
    """
    Loads the lattice vectors from the specified file.

    Parameters
    ----------
    xyz_file : str
        File path to the .xyz file containing the relaxed crystal structure

    Returns
    -------
    numpy.ndarray
        The lattice vectors in angstroms in a numpy array where the rows are
        lattice vectors a, b, and c, and the columns are the unit directions
        x, y, z.
    """
    if not os.path.isfile(xyz_file):
        raise ValueError("XYZ File {} is not a valid file path".format(xyz_file))

    with open(xyz_file, 'r') as f:
        lat_list = f.readlines()[1].split()
        lattice = np.array([lat_list[0:3], lat_list[3:6], lat_list[6:9]], dtype=np.float64)

    return lattice