Source code for ocelot.routines.disparser_functions

import itertools
import math
import re
import warnings
from copy import deepcopy

import networkx as nx
import numpy as np
from pymatgen.core.operations import SymmOp
from pymatgen.core.periodic_table import Element
from pymatgen.core.structure import PeriodicSite
from pymatgen.io.cif import CifFile
from pymatgen.io.cif import _get_cod_data
from pymatgen.io.cif import str2float
from pymatgen.io.cif import sub_spgrp
from pymatgen.symmetry.groups import SYMM_DATA
from pymatgen.symmetry.groups import SpaceGroup
from pymatgen.util.coord import pbc_shortest_vectors
from scipy.spatial.distance import cdist

from ocelot.routines.pbc import PBCparser


[docs]class CifFileError(Exception): pass
possible_symm_labels = [ "_symmetry_equiv_pos_as_xyz", "_symmetry_equiv_pos_as_xyz_", "_space_group_symop_operation_xyz", "_space_group_symop_operation_xyz_", "_symmetry_space_group_name_H-M", "_symmetry_space_group_name_H_M", "_symmetry_space_group_name_H-M_", "_symmetry_space_group_name_H_M_", "_space_group_name_Hall", "_space_group_name_Hall_", "_space_group_name_H-M_alt", "_space_group_name_H-M_alt_", "_symmetry_space_group_name_hall", "_symmetry_space_group_name_hall_", "_symmetry_space_group_name_h-m", "_symmetry_space_group_name_h-m_", "_space_group_IT_number", "_space_group_IT_number_", "_symmetry_Int_Tables_number", "_symmetry_Int_Tables_number_"] latt_labels = [ '_cell_length_a', '_cell_length_b', '_cell_length_c', '_cell_angle_alpha', '_cell_angle_beta', '_cell_angle_gamma', ] chemistry_labels = [ '_cell_formula_units_Z', '_chemical_formula_moiety', '_chemical_formula_sum' ] coord_labels = [ '_atom_site_label', '_atom_site_type_symbol', '_atom_site_fract_x', '_atom_site_fract_y', '_atom_site_fract_z' ] disorder_labels = [ '_atom_site_occupancy', '_atom_site_disorder_group', ] space_groups = {sub_spgrp(k): k for k in SYMM_DATA['space_group_encoding'].keys()} space_groups.update({sub_spgrp(k): k for k in SYMM_DATA['space_group_encoding'].keys()})
[docs]def get_pmg_dict(cifstring: str): """ use pmg dict to parse cifstring, only deal with one structure per file :param cifstring: :return: """ cifdata = CifFile.from_string(cifstring).data idnetifiers = list(cifdata.keys()) if len(idnetifiers) > 1: warnings.warn('W: find more than 1 structures in this cif file!') elif len(idnetifiers) == 0: warnings.warn('W: no structure found by pymatgen parser!') try: identifier = idnetifiers[0] except IndexError: raise CifFileError('no identifier found in the ciffile!') pymatgen_dict = list(cifdata.items())[0][1].data # jmol writes '_atom_site_type_symbol', but not '_atom_site_label' if '_atom_site_label' not in pymatgen_dict.keys(): warnings.warn('W: _atom_site_label not found in parsed dict') atom_site_label = [] symbols = pymatgen_dict['_atom_site_type_symbol'] for i in range(len(symbols)): s = symbols[i] atom_site_label.append('{}{}'.format(s, i)) pymatgen_dict['_atom_site_label'] = atom_site_label return identifier, pymatgen_dict
[docs]def apply_symmop(psites, ops): """ symmop and xyz in cif file: lets say xyz -- op1 --> x'y'z' and xyz -- op2 --> x!y!z! and it is possible to have x'y'z' is_close x!y!z! this means one should take only x'y'z' or x!y!z!, aka op1 is equivalent to op2 due to the symmetry implicated by xyz/the asymmectric unit, e.g. ALOVOO.cif -- Z=2, asymmectric unit given by cif is one molecule, but there're 4 ops so we need first check if the cif file behaves like this """ op_xyzs = [] for op in ops: n_xyzs = [] for ps in psites: new_coord = op.operate(ps.frac_coords) # new_coord = np.array([i - math.floor(i) for i in new_coord]) n_xyzs.append(new_coord) op_xyzs.append(n_xyzs) latt = psites[0].lattice def pbc_dist(fc1, fc2, lattice): v, d2 = pbc_shortest_vectors(lattice, fc1, fc2, return_d2=True) return math.sqrt(d2[0, 0]) def pbc_distmat(fcl1, fcl2): distmat = np.zeros((len(fcl1), len(fcl1))) for i in range(len(fcl1)): for j in range(i, len(fcl1)): distmat[i][j] = pbc_dist(fcl1[i], fcl2[j], latt) distmat[j][i] = distmat[i][j] return distmat def two_xyzs_close(xyzs1, xyzs2, tol=1e-5): dmat = pbc_distmat(xyzs1, xyzs2) almost_zeros = dmat[(dmat < tol)] if len(almost_zeros) > 0: return True return False op_identities = np.zeros((len(ops), len(ops)), dtype=bool) for i, j in itertools.combinations(range(len(ops)), 2): ixyzs = op_xyzs[i] jxyzs = op_xyzs[j] if two_xyzs_close(ixyzs, jxyzs): op_identities[i][j] = True op_identities[j][i] = True groups = [[0]] for i in range(len(ops)): for ig in range(len(groups)): if all(op_identities[i][j] for j in groups[ig]): groups[ig].append(i) if i not in [item for sublist in groups for item in sublist]: groups.append([i]) unique_ops = [ops[g[0]] for g in groups] new_psites = [] for ps in psites: iasym = 0 for op in unique_ops: new_coord = op.operate(ps.frac_coords) new_coord = np.array([i - math.floor(i) for i in new_coord]) new_properties = deepcopy(ps.properties) new_properties['iasym'] = iasym new_ps = PeriodicSite(ps.species_string, new_coord, ps.lattice, properties=deepcopy(new_properties)) new_psites.append(new_ps) iasym += 1 return new_psites, unique_ops
[docs]def braket2float(s): try: return float(s) except ValueError: if isinstance(s, str): return str2float(s) raise TypeError('cannot parse {} into float'.format(s))
[docs]def get_symmop(data): symops = [] for symmetry_label in ["_symmetry_equiv_pos_as_xyz", "_symmetry_equiv_pos_as_xyz_", "_space_group_symop_operation_xyz", "_space_group_symop_operation_xyz_"]: if data.get(symmetry_label): xyz = data.get(symmetry_label) if isinstance(xyz, str): msg = "A 1-line symmetry op P1 CIF is detected!" warnings.warn(msg) xyz = [xyz] try: symops = [SymmOp.from_xyz_string(s) for s in xyz] break except ValueError: continue if not symops: # Try to parse symbol for symmetry_label in ["_symmetry_space_group_name_H-M", "_symmetry_space_group_name_H_M", "_symmetry_space_group_name_H-M_", "_symmetry_space_group_name_H_M_", "_space_group_name_Hall", "_space_group_name_Hall_", "_space_group_name_H-M_alt", "_space_group_name_H-M_alt_", "_symmetry_space_group_name_hall", "_symmetry_space_group_name_hall_", "_symmetry_space_group_name_h-m", "_symmetry_space_group_name_h-m_"]: sg = data.get(symmetry_label) if sg: sg = sub_spgrp(sg) try: spg = space_groups.get(sg) if spg: symops = SpaceGroup(spg).symmetry_ops msg = "No _symmetry_equiv_pos_as_xyz type key found. " \ "Spacegroup from %s used." % symmetry_label warnings.warn(msg) break except ValueError: # Ignore any errors pass try: for d in _get_cod_data(): if sg == re.sub(r"\s+", "", d["hermann_mauguin"]): xyz = d["symops"] symops = [SymmOp.from_xyz_string(s) for s in xyz] msg = "No _symmetry_equiv_pos_as_xyz type key found. " \ "Spacegroup from %s used." % symmetry_label warnings.warn(msg) break except Exception: continue if symops: break if not symops: # Try to parse International number for symmetry_label in ["_space_group_IT_number", "_space_group_IT_number_", "_symmetry_Int_Tables_number", "_symmetry_Int_Tables_number_"]: if data.get(symmetry_label): try: i = int(braket2float(data.get(symmetry_label))) symops = SpaceGroup.from_int_number(i).symmetry_ops break except ValueError: continue if not symops: msg = "No _symmetry_equiv_pos_as_xyz type key found. " \ "Defaulting to P1." warnings.warn(msg) symops = [SymmOp.from_xyz_string(s) for s in ['x', 'y', 'z']] return symops
[docs]def get_connected_pblock(pblocks, cutoff=4.0): def get_coords(pb: [PeriodicSite]): coords = np.zeros((len(pb), 3)) for i in range(len(pb)): coords[i] = pb[i].coords return coords def get_shortest_distance_between_blocks(pb1, pb2): pb1coords = get_coords(pb1) pb2coords = get_coords(pb2) distmat = cdist(pb1coords, pb2coords) minid = np.unravel_index(np.argmin(distmat, axis=None), distmat.shape) return distmat[minid] def get_block_graph(distmat, cutoff): g = nx.Graph() for i in range(len(distmat)): g.add_node(i) for j in range(i + 1, len(distmat)): g.add_node(j) if distmat[i][j] < cutoff: g.add_edge(i, j) return g distmat_pblocks = np.zeros((len(pblocks), len(pblocks))) for i in range(len(pblocks)): for j in range(i + 1, len(pblocks)): distmat_pblocks[i][j] = get_shortest_distance_between_blocks(pblocks[i], pblocks[j]) distmat_pblocks[j][i] = distmat_pblocks[i][j] block_graph = get_block_graph(distmat_pblocks, cutoff) connected_block_ids = nx.connected_components(block_graph) # [[1,3], [2, 4, 5], ...] merged_blocks = [] for ids in connected_block_ids: merged_block = [] for i in ids: merged_block += pblocks[i] merged_blocks.append(merged_block) return merged_blocks
[docs]def find_connected_psites(psites: [PeriodicSite]): """ simplified version of percolation in unwrap """ latt = psites[0].lattice pindices = range(len(psites)) visited = [] block_list = [] while len(visited) != len(psites): # initialization unvisited = [idx for idx in pindices if idx not in visited] ini_idx = unvisited[0] block = [ini_idx] pointer = 0 while pointer != len(block): outside = [idx for idx in pindices if idx not in block and idx not in visited] for i in outside: distance, fctrans = PBCparser.get_dist_and_trans(latt, psites[block[pointer]]._frac_coords, psites[i]._frac_coords, ) cutoff = Element(psites[block[pointer]].species_string).atomic_radius + Element( psites[i].species_string).atomic_radius cutoff *= 1.3 if distance < cutoff: block.append(i) visited.append(block[pointer]) pointer += 1 block_list.append(block) return [[psites[j] for j in b] for b in block_list]