Source code for surfaxe.analysis

# Pymatgen
from pymatgen.core import Structure
from pymatgen.analysis.local_env import CrystalNN, CutOffDictNN
from pymatgen.io.vasp.outputs import Locpot

# Misc 
import math
import numpy as np
import pandas as pd
import warnings

# surfaxe
from surfaxe.generation import oxidation_states
from surfaxe.io import plot_bond_analysis, plot_electrostatic_potential, _instantiate_structure

[docs]def cart_displacements(start, end, max_disp=0.1, save_txt=True,
txt_fname='cart_displacements.txt'):
    """
    Produces a text file with all the magnitude of displacements of atoms
    in Cartesian space

    Args:
        start (`str`): Filename of initial structure file in any format 
            supported by pymatgen or pymatgen structure object.
        end (`str`): Filename of final structure file in any format supported
            by pymatgen or pymatgen structure object.
        max_disp (`float`, optional): The maximum displacement shown. Defaults 
            to 0.1 Å.
        save_txt (`bool`, optional): Save the displacements to file. Defaults to 
            ``True``.
        txt_fname (`str`, optional): Filename of the csv file. Defaults to 
            ``'cart_displacement.txt'``.

    Returns:
       None (default) or DataFrame of displacements of atoms in Cartesian space 

    """
    # Instantiate the structures 
    start_struc = _instantiate_structure(start)
    end_struc = _instantiate_structure(end)

    # Add the site labels to the structure
    els = ''.join([i for i in start_struc.formula if not i.isdigit()]).split(' ')
    el_dict = {i : 1 for i in els}
    site_labels = []

    for site in start_struc:
        symbol = site.specie.symbol
        site_labels.append((symbol,el_dict[symbol]))
        el_dict[symbol] +=1
    start_struc.add_site_property('', site_labels)

    # Convert to cartesian coordinates
    start_struc = start_struc.cart_coords
    end_struc = end_struc.cart_coords

    # Calculate the displacements
    disp_list = []
    for n, (start_coord, end_coord) in enumerate(zip(start_struc, end_struc)):
        xdisp = math.pow(start_coord[0] - end_coord[0], 2)
        ydisp = math.pow(start_coord[1] - end_coord[1], 2)
        zdisp = math.pow(start_coord[2] - end_coord[2], 2)
        d = math.sqrt(xdisp + ydisp + zdisp)
        label = site_labels[n]
        if d >= max_disp:
            disp_list.append({
                'site': n+1,
                'atom': label,
                # this makes the displacements round to the same number of 
                # decimal places as max displacement, for presentation 
                'displacement': round(d, int(format(max_disp, 'E')[-1])) 
                             })
    # Save as txt file
    df = pd.DataFrame(disp_list)

    if save_txt: 
        df.to_csv(txt_fname, header=True, index=False, sep='\t', mode='w')
    else: 
        return df

[docs]def bond_analysis(structure, bond, nn_method=CrystalNN(), ox_states=None, 
save_csv=True, csv_fname='bond_analysis.csv', save_plt=False, 
plt_fname='bond_analysis.png', **kwargs):
    """
    Parses the structure looking for bonds between atoms. Check the validity of
    the nearest neighbour method on the bulk structure before using it on slabs.

    Args:
        structure (`str`): filename of structure, takes all pymatgen-supported 
            formats, including pmg structure object
        bond (`list`): Bond to analyse e.g. ``['Y', 'O']``
        nn_method (`class`, optional): The coordination number prediction 
            algorithm used. Because the ``nn_method`` is a class, the class 
            needs to be imported from ``pymatgen.analysis.local_env`` before it 
            can be instantiated here. Defaults to ``CrystalNN()``.
        ox_states (``None``, `list` or  `dict`, optional): Add oxidation states 
            to the structure. Different types of oxidation states specified will 
            result in different pymatgen functions used. The options are: 
            
            * if supplied as ``list``: The oxidation states are added by site 
                    
                    e.g. ``[3, 2, 2, 1, -2, -2, -2, -2]``
            
            * if supplied as ``dict``: The oxidation states are added by element
                    
                    e.g. ``{'Fe': 3, 'O':-2}``
            
            * if ``None``: The oxidation states are added by guess. 
              
            Defaults to ``None``. 
        save_csv (`bool`, optional): Makes a csv file with the c coordinate of 
            the first atom and bond length. Defaults to ``True``.
        csv_fname (`str`, optional): Filename of the csv file. Defaults to 
            ``'bond_analysis.csv'``.
        save_plt (`bool`, optional): Make and save the bond analysis plot. 
            Defaults to ``False``. 
        plt_fname (`str`, optional): Filename of the plot. Defaults to 
            ``'bond_analysis.png'``. 

    Returns:
        DataFrame with the c coordinate of the first atom and bond length
    """
    struc = _instantiate_structure(structure)
    struc = oxidation_states(structure=struc, ox_states=ox_states)

    if len(bond) > 2: 
        warnings.warn('Bond with more than two elements supplied. '
        'Only the first two elements will be treated as a bond.')

    # Iterates through the structure, looking for pairs of bonded atoms. If the
    # sites match, the bond distance is calculated and passed to a dataframe
    bonds_info = []
    for n, pos in enumerate(struc):
        if pos.specie.symbol == bond[0]:
            nearest_neighbours = nn_method.get_nn_info(struc, n)
            matched_sites = []
            for d in nearest_neighbours:
                if d.get('site').specie.symbol == bond[1]:
                    matched_sites.append(d)
            bond_distances = [
                struc.get_distance(n,x['site_index']) for x in matched_sites
            ]
            bonds_info.append({
                '{}_index'.format(bond[0]): n+1,
                '{}_c_coord'.format(bond[0]): pos.c,
                '{}-{}_bond_distance'.format(bond[0],bond[1]): np.mean(bond_distances)
            })

    df = pd.DataFrame(bonds_info)
    
    # Save plot and csv, or return the DataFrame 
    if save_plt: 
        plot_bond_analysis(bond, df=df, plt_fname=plt_fname, **kwargs)
    if save_csv: 
        if not csv_fname.endswith('.csv'):
            csv_fname += '.csv'
        df.to_csv(csv_fname, header=True, index=False)
    else: 
        return df


[docs]def electrostatic_potential(locpot='./LOCPOT', lattice_vector=None,
save_csv=True, csv_fname='potential.csv', save_plt=True, 
plt_fname='potential.png', **kwargs):
    """
    Reads LOCPOT to get the planar and optionally macroscopic potential in 
    c direction. 

    Args:
        locpot (`str`, optional): The path to the LOCPOT file. Defaults to 
            ``'./LOCPOT'``
        lattice_vector (`float`, optional): The periodicity of the slab, 
            calculates macroscopic potential with that periodicity 
        save_csv (`bool`, optional): Saves to csv. Defaults to ``True``.
        csv_fname (`str`, optional): Filename of the csv file. Defaults 
            to ``'potential.csv'``.
        save_plt (`bool`, optional): Make and save the plot of electrostatic 
            potential. Defaults to ``True``. 
        plt_fname (`str`, optional): Filename of the plot. Defaults to 
            ``'potential.png'``.

    Returns:
        DataFrame
    """
    # Read potential and structure data
    lpt = Locpot.from_file(locpot)
    struc = Structure.from_file(locpot)

    # Planar potential
    planar = lpt.get_average_along_axis(2)
    df = pd.DataFrame(data=planar, columns=['planar']) 
    
    # Calculate macroscopic potential
    if lattice_vector is not None: 
        # Divide lattice parameter by no. of grid points in the direction
        resolution = struc.lattice.abc[2]/lpt.dim[2]

        # Get number of points over which the rolling average is evaluated
        points = int(lattice_vector/resolution)

        # Need extra points at the start and end of planar potential to evaluate the
        # macroscopic potential this makes use of the PBC where the end of one unit
        # cell coincides with start of the next one
        add_to_start = planar[(len(planar) - points): ]
        add_to_end = planar[0:points]
        pfm_data = np.concatenate((add_to_start,planar,add_to_end))
        pfm = pd.DataFrame(data=pfm_data, columns=['y'])

        # Macroscopic potential
        m_data = pfm.y.rolling(window=points, center=True).mean()
        macroscopic = m_data.iloc[points:(len(planar)+points)]
        macroscopic.reset_index(drop=True,inplace=True)
        df['macroscopic'] = macroscopic

    # Get gradient of the plot - this is used for convergence testing, to make 
    # sure the potential is actually flat
    df['gradient'] = np.gradient(df['planar'])

    # Plot and save the graph, save the csv or return the dataframe
    if save_plt: 
        plot_electrostatic_potential(df=df, plt_fname=plt_fname, **kwargs)
    if save_csv: 
        if not csv_fname.endswith('.csv'):
            csv_fname += '.csv'
        df.to_csv(csv_fname, header=True, index=False)
    else: 
        return df

[docs]def simple_nn(start, end=None, ox_states=None, nn_method=CrystalNN(), 
save_csv=True, csv_fname='nn_data.csv'):
    """
    Finds the nearest neighbours for simple structures. Before using on slabs
    make sure the nn_method works with the bulk structure. 
    
    The ``site_index`` in the produced DataFrame or csv file is one-indexed and 
    represents the atom index in the structure. 

    Args:
        start (`str`): Filename of structure file in any format supported by 
            pymatgen
        end (`str`, optional): Filename of structure file in any format 
            supported by pymatgen. Use if comparing initial and final structures. 
            The structures must have same constituent atoms and number of sites. 
            Defaults to ``None``. 
        ox_states (``None``, `list` or  `dict`, optional): Add oxidation states 
            to the structure. Different types of oxidation states specified will 
            result in different pymatgen functions used. The options are: 
            
            * if supplied as ``list``: The oxidation states are added by site 
                    
                    e.g. ``[3, 2, 2, 1, -2, -2, -2, -2]``
            
            * if supplied as ``dict``: The oxidation states are added by element
                    
                    e.g. ``{'Fe': 3, 'O':-2}``
            
            * if ``None``: The oxidation states are added by guess. 
              
            Defaults to ``None``. 
        nn_method (`class`, optional): The coordination number prediction 
            algorithm used. Because the ``nn_method`` is a class, the class 
            needs to be imported from pymatgen.analysis.local_env before it 
            can be instantiated here. Defaults to ``CrystalNN()``.
        save_csv (`bool`, optional): Save to a csv file. Defaults to ``True``.
        csv_fname (`str`, optional): Filename of the csv file. Defaults to 
            ``'nn_data.csv'``
    
    Returns
        None (default) or DataFrame containing coordination data 
    """
    # Instantiate start structure object
    start_struc = _instantiate_structure(start)

    # Add atom site labels to the structure
    els = ''.join([i for i in start_struc.formula if not i.isdigit()]).split(' ')
    el_dict = {i : 1 for i in els}
    site_labels = []
    for site in start_struc:
        symbol = site.specie.symbol
        site_labels.append((symbol,el_dict[symbol]))
        el_dict[symbol] +=1
    start_struc.add_site_property('', site_labels)
    
    # Add oxidation states and get bonded structure
    start_struc = oxidation_states(start_struc, ox_states)
    bonded_start = nn_method.get_bonded_structure(start_struc)

    if end: 
        end_struc = _instantiate_structure(end)
        end_struc = oxidation_states(end_struc, ox_states)
        bonded_end = nn_method.get_bonded_structure(end_struc)
    
    # Iterate through structure, evaluate the coordination number and the 
    # nearest neighbours specie for start and end structures, collects the
    # symbol and index of the site (atom) evaluated and its nearest neighbours 
    df_list = []
    for n, site in enumerate(start_struc):
        cn_start = bonded_start.get_coordination_of_site(n)
        coord_start = bonded_start.get_connected_sites(n)
        specie_list = []
        for d in coord_start: 
            spc = d.site.specie.symbol 
            specie_list.append(spc)
        specie_list.sort()
        site_nn_start = ' '.join(specie_list)
        label = site_labels[n]

        if end: 
            cn_end = bonded_end.get_coordination_of_site(n)
            coord_end = bonded_end.get_connected_sites(n)
            specie_list = []
            for d in coord_end: 
                spc = d.site.specie.symbol 
                specie_list.append(spc)
            specie_list.sort()
            site_nn_end = ' '.join(specie_list)
            df_list.append({'site': n+1, 'atom': label, 'cn_start': cn_start,
            'nn_start': site_nn_start, 'cn_end': cn_end, 'nn_end': site_nn_end})

        else: 
            df_list.append({'site_index': n+1, 'site': label,
            'cn_start': cn_start, 'nn_start': site_nn_start})

    # Make a dataframe from df_list 
    df = pd.DataFrame(df_list)

    # Save the csv file or return as dataframe 
    if save_csv: 
        if not csv_fname.endswith('.csv'):
            csv_fname += '.csv'
        df.to_csv(csv_fname, header=True, index=False)
    else:    
        return df


[docs]def complex_nn(start,  cut_off_dict, end=None, ox_states=None, 
save_csv=True, csv_fname='nn_data.csv'):
    """
    Finds the nearest neighbours for more complex structures. Uses CutOffDictNN()
    class as the nearest neighbour method. Check validity on bulk structure
    before applying to surface slabs. 

    The ``site_index`` in the produced DataFrame or csv file is one-indexed and 
    represents the atom index in the structure. 

    Args:
        start (`str`): filename of structure, takes all pymatgen-supported formats.
        cut_off_dict (`dict`): Dictionary of bond lengths. The bonds should be 
            specified with the oxidation states\n
            e.g. ``{('Bi3+', 'O2-'): 2.46, ('V5+', 'O2-'): 1.73}``
        end (`str`, optional): filename of structure to analyse, use if 
            comparing initial and final structures. The structures must have 
            same constituent atoms and number of sites. Defaults to ``None``. 
        ox_states (``None``, `list` or  `dict`, optional): Add oxidation states 
            to the structure. Different types of oxidation states specified will 
            result in different pymatgen functions used. The options are: 
            
            * if supplied as ``list``: The oxidation states are added by site 
                    
                    e.g. ``[3, 2, 2, 1, -2, -2, -2, -2]``
            
            * if supplied as ``dict``: The oxidation states are added by element
                    
                    e.g. ``{'Fe': 3, 'O':-2}``
            
            * if ``None``:  The oxidation states are added by guess.  

            Defaults to ``None`` 
        save_csv (`bool`, optional): Save to a csv file. Defaults to ``True``.
        csv_fname (`str`, optional): Filename of the csv file. Defaults to 
            ``'nn_data.csv'``
    
    Returns
        None (default) or DataFrame containing coordination data.
    """
    # Instantiate start structure object
    start_struc = Structure.from_file(start)

    # Add atom site labels to the structure
    els = ''.join([i for i in start_struc.formula if not i.isdigit()]).split(' ')
    el_dict = {i : 1 for i in els}
    site_labels = []
    for site in start_struc:
        symbol = site.specie.symbol
        site_labels.append((symbol,el_dict[symbol]))
        el_dict[symbol] +=1
    start_struc.add_site_property('', site_labels)

    # Add oxidation states 
    start_struc = oxidation_states(start_struc, ox_states=ox_states)

    # Instantiate the nearest neighbour algorithm and get bonded structure
    codnn = CutOffDictNN(cut_off_dict=cut_off_dict)
    bonded_start = codnn.get_bonded_structure(start_struc)

    # Instantiate the end structure if provided
    if end: 
        end_struc = Structure.from_file(end)
        end_struc = oxidation_states(end_struc, ox_states=ox_states)
        bonded_end = codnn.get_bonded_structure(end_struc)

    # Iterate through structure, evaluate the coordination number and the 
    # nearest neighbours specie for start and end structures, collects the
    # symbol and index of the site (atom) evaluated and its nearest neighbours 
    df_list = []
    for n, site in enumerate(start_struc):
        cn_start = bonded_start.get_coordination_of_site(n)
        coord_start = bonded_start.get_connected_sites(n)
        specie_list = []
        for d in coord_start: 
            spc = d.site.specie.symbol 
            specie_list.append(spc)
        specie_list.sort()
        site_nn_start = ' '.join(specie_list)
        label = site_labels[n]

        if end: 
            cn_end = bonded_end.get_coordination_of_site(n)
            coord_end = bonded_end.get_connected_sites(n)
            specie_list = []
            for d in coord_end: 
                spc = d.site.specie.symbol 
                specie_list.append(spc)
            specie_list.sort()
            site_nn_end = ' '.join(specie_list)
            df_list.append({'site': n+1, 'atom': label, 'cn start': cn_start,
            'nn_start': site_nn_start, 'cn_end': cn_end, 'nn_end': site_nn_end})

        else: 
            df_list.append({'site_index': n+1, 'site': label,
            'cn_start': cn_start, 'nn_start': site_nn_start})

    # Make a dataframe from df_list 
    df = pd.DataFrame(df_list)
    
    # Save the csv file or return as dataframe 
    if save_csv: 
        if not csv_fname.endswith('.csv'):
            csv_fname += '.csv'
        df.to_csv(csv_fname, header=True, index=False)
    else:    
        return df