Source code for cvm.utils

#!/usr/bin/env python3
# -*- coding:utf-8 -*-

import collections
import json
import re
import tempfile
from contextlib import contextmanager
from pathlib import Path

import numpy as np
import pandas as pd
from ruamel.yaml import YAML
from scipy.stats.mstats import gmean, hmean

__all__ = [
    'UnitConvert', 'get_inp', 'mixed_atomic_weight', 'parse_input_set', 'parse_formula',
    'cvm_context', 'logspace'
]


[docs]class UnitConvert:
    # lattice constan to atomic distance
[docs]    @staticmethod
    def lc2ad(d, n=4):
        if not isinstance(d, (float, int, list, np.ndarray, pd.Series)):
            raise RuntimeError(f'parameter <d> must be a number but got `{d}``')
        return d * np.power((3 / (4 * n * np.pi)), 1 / 3)

    # atomic distance to lattice constan
[docs]    @staticmethod
    def ad2lc(d, n=4):
        if not isinstance(d, (float, int, list, np.ndarray, pd.Series)):
            raise RuntimeError(f'parameter <d> must be a number but got `{d}`')
        return d / np.power((3 / (4 * n * np.pi)), 1 / 3)

    # eV. press to Kbar
[docs]    @staticmethod
    def eV2Kbar(p):
        if not isinstance(p, (float, int, list, np.ndarray, pd.Series)):
            raise RuntimeError(f'parameter <p> must be a number but got `{d}`')
        return p * 2.9421912e13 * 1e-8 / 27.21138505

    # a.u. temperature to K
[docs]    @staticmethod
    def au2K(t):
        if not isinstance(t (float, int, list, np.ndarray, pd.Series)):
            raise RuntimeError(f'parameter <t> must be a number but got `{t}`')
        return t * 3.1577464e5

    # a.u. temperature to K
[docs]    @staticmethod
    def ry2eV(t):
        if not isinstance(t, (float, int, list, np.ndarray, pd.Series)):
            raise RuntimeError(f'parameter <t> must be a number but got `{t}`')
        return t * 13.605698066


[docs]def get_inp(path):
    # remove comment in json
    pattern = re.compile(r"(/\*)+.+?(\*/)", re.S)
    path = Path(path).expanduser().resolve()
    with open(str(path)) as f:
        _content = f.read()
        _content = pattern.sub('', _content)
    f = tempfile.TemporaryFile(mode='w+t')
    f.write(_content)
    f.seek(0)
    inp = json.load(f)
    f.close()
    return inp


[docs]def mixed_atomic_weight(formula: str, *, mean='arithmetic'):
    atomic_weight = {
        'H': 1.008,
        'He': 4.0026019999999995,
        'Li': 6.94,
        'Be': 9.0121831,
        'B': 10.81,
        'C': 12.011,
        'N': 14.007,
        'O': 15.999,
        'F': 18.99840316,
        'Ne': 20.1797,
        'Na': 22.98976928,
        'Mg': 24.305,
        'Al': 26.9815385,
        'Si': 28.085,
        'P': 30.973762,
        'S': 32.06,
        'Cl': 35.45,
        'Ar': 39.948,
        'K': 39.0983,
        'Ca': 40.078,
        'Sc': 44.955908,
        'Ti': 47.867,
        'V': 50.9415,
        'Cr': 51.9961,
        'Mn': 54.938044,
        'Fe': 55.845,
        'Co': 58.93319399999999,
        'Ni': 58.6934,
        'Cu': 63.54600000000001,
        'Zn': 65.38,
        'Ga': 69.723,
        'Ge': 72.63,
        'As': 74.921595,
        'Se': 78.971,
        'Br': 79.904,
        'Kr': 83.79799999999999,
        'Rb': 85.4678,
        'Sr': 87.62,
        'Y': 88.90584,
        'Zr': 91.22399999999999,
        'Nb': 92.90637,
        'Mo': 95.95,
        'Tc': 97.90720999999999,
        'Ru': 101.07,
        'Rh': 102.9055,
        'Pd': 106.42,
        'Ag': 107.8682,
        'Cd': 112.414,
        'In': 114.818,
        'Sn': 118.71,
        'Sb': 121.76,
        'Te': 127.6,
        'I': 126.90446999999999,
        'Xe': 131.293,
        'Cs': 132.905452,
        'Ba': 137.327,
        'La': 138.90547,
        'Ce': 140.116,
        'Pr': 140.90766000000002,
        'Nd': 144.24200000000002,
        'Pm': 144.91276000000002,
        'Sm': 150.36,
        'Eu': 151.964,
        'Gd': 157.25,
        'Tb': 158.92535,
        'Dy': 162.5,
        'Ho': 164.93033,
        'Er': 167.25900000000001,
        'Tm': 168.93421999999998,
        'Yb': 173.045,
        'Lu': 174.9668,
        'Hf': 178.49,
        'Ta': 180.94788,
        'W': 183.84,
        'Re': 186.207,
        'Os': 190.23,
        'Ir': 192.217,
        'Pt': 195.084,
        'Au': 196.966569,
        'Hg': 200.592,
        'Tl': 204.38,
        'Pb': 207.2,
        'Bi': 208.9804,
        'Po': 209.0,
        'At': 210.0,
        'Rn': 222.0,
        'Fr': 223.0,
        'Ra': 226.0,
        'Ac': 227.0,
        'Th': 232.0377,
        'Pa': 231.03588,
        'U': 238.02891,
        'Np': 237.0,
        'Pu': 244.0,
        'Am': 243.0,
        'Cm': 247.0,
        'Bk': 247.0,
        'Cf': 251.0,
        'Es': 252.0,
        'Fm': 257.0,
        'Md': 258.0,
        'No': 259.0,
        'Lr': 262.0,
        'Rf': 267.0,
        'Db': 268.0,
        'Sg': 271.0,
        'Bh': 274.0,
        'Hs': 269.0,
        'Mt': 276.0,
        'Ds': 281.0,
        'Rg': 281.0,
        'Cn': 285.0,
        'Nh': 286.0,
        'Fl': 289.0,
        'Mc': 288.0,
        'Lv': 293.0,
        'Ts': 294.0,
        'Og': 294.0
    }

    weights = []
    num = 0
    for k, v in parse_formula(formula).items():
        weights += [atomic_weight[k]] * int(v)
        num += int(v)

    if mean == 'arithmetic':
        return np.mean(weights), num

    if mean == 'harmonic':
        return hmean(weights), num

    if mean == 'geometric':
        return gmean(weights), num

    raise ValueError("mean can be 'arithmetic', 'harmonic', and 'geometric' but got %s" % mean)


[docs]def parse_input_set(path_of_set):
    path = Path(path_of_set).expanduser().resolve()
    if not path.is_dir() or not (path / 'input.yml').exists():
        raise RuntimeError('can not parse input set')
    yaml = YAML()
    with open(str(path / 'input.yml'), 'r') as f:
        inp = yaml.load(f)

    if 'meta' not in inp:
        raise RuntimeError('can not find an entry named meta')

    if 'experiment' in inp:
        inp['experiment'] = pd.DataFrame(inp['experiment'])

    if 'series' in inp:
        for s in inp['series']:
            s['lattice'] = s['lattice'] if 'lattice' in s else 'lattice'
            s['is_ry_unit'] = s['is_ry_unit'] if 'is_ry_unit' in s else True

            ens = pd.read_csv(path / s['energies'], index_col=s['lattice'])
            if s['is_ry_unit']:
                ens = ens * 13.605698066
            s['energies'] = ens

            if 'normalizer' in s:
                ens = pd.read_csv(path / s['normalizer']['energies'], index_col=s['lattice'])
                if s['is_ry_unit']:
                    ens = ens * 13.605698066
                s['normalizer']['energies'] = ens

            # remove unused parameter
            del s['lattice']
            del s['is_ry_unit']

    return inp


[docs]def parse_formula(formula):
    """
    Args:
        formula (str): A string formula, e.g. Fe2O3, Li3Fe2(PO4)3

    Returns:
        Composition with that formula.

    Notes:
        In the case of Metallofullerene formula (e.g. Y3N@C80),
        the @ mark will be dropped and passed to parser.
    """
    # for Metallofullerene like "Y3N@C80"
    formula = formula.replace("@", "")

    def get_sym_dict(f, factor):
        sym_dict = collections.defaultdict(float)
        for m in re.finditer(r"([A-Z][a-z]*)\s*([-*\.\d]*)", f):
            el = m.group(1)
            amt = 1
            if m.group(2).strip() != "":
                amt = float(m.group(2))
            sym_dict[el] += amt * factor
            f = f.replace(m.group(), "", 1)
        if f.strip():
            raise RuntimeError("{} is an invalid formula!".format(f))
        return sym_dict

    m = re.search(r"\(([^\(\)]+)\)\s*([\.\d]*)", formula)
    if m:
        factor = 1
        if m.group(2) != "":
            factor = float(m.group(2))
        unit_sym_dict = get_sym_dict(m.group(1), factor)
        expanded_sym = "".join(["{}{}".format(el, amt) for el, amt in unit_sym_dict.items()])
        expanded_formula = formula.replace(m.group(), expanded_sym)
        return parse_formula(expanded_formula)
    return get_sym_dict(formula, 1)


[docs]@contextmanager
def cvm_context(**kwargs):
    """
    Set temp environment variable using ``with`` statement.

    Examples
    --------
    >>> import os
    >>> with cvm_context(simple_print='True'):
    >>>    print(os.getenv('simple_print'))
    True
    >>> print(os.getenv('simple_print'))
    None

    Parameters
    ----------
    kwargs: dict[str]
        Dict with string value.
    """
    import os

    tmp = dict()
    for k, v in kwargs.items():
        tmp[k] = os.getenv(k)
        os.environ[k] = v
    yield
    for k, v in tmp.items():
        if not v:
            del os.environ[k]
        else:
            os.environ[k] = v


[docs]def logspace(start: float, end: float, num: int) -> np.ndarray:
    """Generate log scaled series.

    Parameters
    ----------
    start : float
        Start point.
    end : float.
        End point
    num : int
        Steps.

    Returns
    -------
    series: np.ndarray
    """

    curve_paras = [1, 8]
    base_lin = np.linspace(np.exp2(curve_paras[0]), np.exp2(curve_paras[1]), num)
    logs = np.log2(base_lin)
    div = (logs[1:] - logs[:-1]) / ((curve_paras[1] - curve_paras[0]) / (num - 1))

    step = ((end - start) / (num - 1)) * div

    sample = np.zeros(num)
    sample[0] = start
    for i, v in enumerate(step):
        sample[i + 1] = sample[i] + v

    return sample