Source code for HTMACat.descriptor.Construct_descriptor

from HTMACat.Extract_info import *
from HTMACat.configuration import base_info

[docs]def Construct_descriptor(poscar, feature_surf, feature_ads, feature_site, adspecies, facet="100"): """Construct a descriptor for a given catalytic system based on its geometry and adsorption properties. Parameters ---------- poscar : str The POSCAR file path of the catalyst-adsorbate system. feature_surf : list A list of strings specifying the features to extract from the surface of the catalytic system. Possible options are 'valence electron', 'atomic radius', 'mean valence electron', and 'mean atomic radius'. feature_ads : list A list of strings specifying the features to extract from the adsorbate molecule. Possible options are 'mean electronegativity' and 'mean valence electron'. feature_site : list A list of strings specifying the features to extract from the binding site of the catalytic system. Possible options are 'mean valence electron', 'mean atomic radius', and 'binding type'. adspecies : list A list of strings specifying the adsorbate species that can bind to the catalytic system. facet : str, optional The surface facet of the catalytic system. Possible options are '100' and '111'. The default is '100'. Returns ------- descriptor : array_like A 1D array containing the descriptor for the catalytic system. The descriptor is constructed by concatenating the following features: surface valence electron, surface atomic radius, mean valence electron of surface and subsurface atoms, mean atomic radius of surface and subsurface atoms, mean electronegativity of adsorbate molecule, mean valence electron of adsorbate molecule, mean valence electron of binding site atoms, mean atomic radius of binding site atoms, and binding type of binding site ('top', 'bridge', 'fcc', 'hcp', or '4-fold'). """ descriptor = [] descriptor_surf = [] ### the features of catalyst surface : surfce valence electron,surface atomic radius, ### surf+subsurf mean valence electron,surf+subsurf mean atomic radius # surfatoms,surfatoms_symb=distinguish_atom_binding(poscar, tol=0.03,layer='surf_atom') # subsurfatoms,subsurfatoms_symb=distinguish_atom_binding(poscar, tol=0.03,layer='subsurf_atom') ( adatoms, adatoms_symb, surfatoms, surfatoms_symb, subsurfatoms, subsurfatoms_symb, ) = distinguish_atom_binding(poscar, tol=0.05) # print(surfatoms_symb) ## If NO, The symmetry of surface atoms could be broken if get_symmetry_surfatoms(poscar, tol=0.3) == "NO": print(f"The symmetry of surface atoms of {poscar} could not be keeped!") # return descriptor else: feature_value_surf = Construct_descriptor_info(base_info, surfatoms_symb, feature_surf) feature_value_subsurf = Construct_descriptor_info( base_info, subsurfatoms_symb, feature_surf ) # print(feature_value_surf) m0, n0 = np.array(feature_value_surf).shape m1, n1 = np.array(feature_value_subsurf).shape # print(m0,m1) ## Ignore structures without standard or integrated surface configuration ## if m0 not equal to m1 menas that the large surface reconstruction occurs and causes the broken of surface. if (feature_value_surf != []) and (m0 == m1): # print(feature_value_surf,feature_value_subsurf) feature_value = np.hstack((feature_value_surf, feature_value_subsurf)) # print(feature_value) descriptor_surf_tmp = np.around(np.mean(feature_value, 0), 2) facet_coord = {"100": 8, "111": 9} descriptor_surf = np.hstack((descriptor_surf_tmp, [facet_coord.get(facet)])) ### the feature of adspecies and binding sites ( bind_adatoms, bind_adatoms_symb, adspecie, bind_type_symb, bind_surfatoms, bind_surfatoms_symb, ) = get_binding_adatom(poscar) # print(bind_adatoms,bind_adatoms_symb,adspecie,bind_type_symb,bind_surfatoms,bind_surfatoms_symb) # print(adspecie,bind_type_symb,bind_surfatoms_symb) # print(bind_type_symb[0]) if adspecie == []: print(f"The molecule can not adsorb in the {poscar}!") elif len(adspecie) > 1: print(f"More than 1 adspecie are found in {poscar}!") elif set(adspecie).intersection(set(adspecies)): ## construct the descriptor of adsorbate: mean enegativity, mean valence_electron # print(adspecie) descriptor_ads = [] # print(adspecie) ads = molecule(adspecie[0]) # print(ads) ads_symb = ads.get_chemical_symbols() feature_value_ads = Construct_descriptor_info(base_info, ads_symb, feature_ads) descriptor_ads = np.around(np.mean(feature_value_ads, 0), 2) # print(descriptor_ads) ## construct the descriptor of site: mean valence electron, mean atomic radius, bind type descriptor_site = [] typ = {None: 0, "top": 1, "bri": 2, "fcc": 3, "hcp": 3, "4-fold": 4} typ2 = {None: 0, "top": 0, "bri": 0, "fcc": 0, "hcp": 1, "4-fold": 0} site_type = np.hstack((typ.get(bind_type_symb[0]), typ2.get(bind_type_symb[0]))) feature_value_site = Construct_descriptor_info( base_info, bind_surfatoms_symb[0], feature_site ) descriptor_site_tmp = np.around(np.mean(feature_value_site, 0), 2) descriptor_site = np.append(descriptor_site_tmp, site_type) # print(descriptor_site) descriptor = np.hstack((descriptor_surf, descriptor_ads, descriptor_site)) # print(descriptor) return descriptor else: print(adspecie) print(f"{poscar} can not be identified!") else: print(f"Surface info of {poscar} can not be obtained ")
from HTMACat.Extract_info import * from HTMACat.descriptor.Construct_descriptor import * from HTMACat.Base_tools import * import os import numpy as np import operator if __name__ == "__main__": feature_surf = ["Valence_electron", "Atomic_radius"] # feature_ads=['Enegativity','Valence_electron'] feature_ads = ["Valence_electron", "Atomic_radius"] feature_site = ["Valence_electron", "Atomic_radius"] adspecies = ["NH3", "NH2", "NH", "N", "O", "OH", "H"] dop_typ_all = ["1", "2", "3", "4", "1L"] facet = "111" print("----------------------------------------") print("Construct descriptor starts:") print("1st step: Get the whole 'Descriptor+Ead'") EnerInfo = open("adsE_radical_all", "r+") file_des = open("descriptor", "w+") file_all = open("descriptor-all", "w+") file_log = open("descriptor-log", "w+") des, des_all, des_tmp = [], [], [] for i, Ener in enumerate(EnerInfo): sys = Ener.split(",")[0] ene = Ener.split(",")[-1].strip() sys_all = sys.split("_") dop_typ = sys_all[-3] specie = set(sys_all).intersection(set(adspecies)) if specie: if dop_typ in dop_typ_all: # poscar= f'./{sys}/optmk/CONTCAR' poscar = f"./{sys}/CONTCAR" print(sys) descriptor = Construct_descriptor( poscar, feature_surf, feature_ads, feature_site, adspecies, facet=facet ) # print(descriptor) if descriptor is None: tmp = np.hstack(([sys], ["None"], [ene])) for d in tmp: if d == tmp[-1]: file_all.write("%s\n" % d) else: file_all.write("%s\t" % d) else: tmp = np.hstack(([sys], descriptor, [ene])) for d in tmp: if d == tmp[-1]: file_all.write("%s\n" % d) else: file_all.write("%s\t" % d) # file_all.writelines('%s\n' %tmp) des_tmp += [np.hstack(([sys], descriptor, [ene]))] print("2nd: Extrate the repeated values") ###Substrate the repeated items according to the feature list m, n = np.array(des_tmp).shape # Extrate the feature value des_feature = [des_tmp[j][1:-1] for j in range(m)] # The feaure list after substrate the repeat des_feature_tmp = np.array(list({tuple(t) for t in des_feature})) k, l = des_feature_tmp.shape # The threshold value Ecut of Ead: if Ead > Ecut, ignore it Ecut = 0.25 print(m, k) # Output the feature and energy if m > k: print("Subtrate Repeat Values Starts:") for i, d1 in enumerate(des_feature_tmp): for j, d2 in enumerate(des_tmp): if all(d1 == d2[1:-1]): tmp = np.hstack((d1, d2[-1])) tmp2 = np.hstack((d1, [d2[0].split("_")[0]])) ## output if float(tmp[-1]) < float(Ecut): # output des+ene for d in tmp: if d == tmp[-1]: file_des.write("%s\n" % d) else: file_des.write("%s\t" % d) # output des+type: des+'Au' for d in tmp2: if d == tmp2[-1]: file_log.write("%s\n" % d) else: file_log.write("%s\t" % d) else: print(f"Ignore the value {tmp[-1]} >= {Ecut}") break else: continue else: print("No Repeated Value") file_all.close() file_des.close() print("Subtrate Repeat Values End !") """print('---------Raw data---------') os.system('cat descriptor-all') print('---------Final data---------') os.system('cat descriptor')"""