Source code for HTMACat.descriptor.Construct_descriptor_coad

from HTMACat.Extract_info import *
from ase.build import molecule
from HTMACat.configuration import base_info


[docs]def Construct_descriptor_coad(poscar, feature_surf, feature_ads, feature_site): """Construct a descriptor for a catalyst-adsorbate system. Parameters ---------- poscar : str The POSCAR file path of the catalyst-adsorbate system. feature_surf : list A list of features of the catalyst surface for constructing the descriptor. The features include: - surface valence electron - surface atomic radius - surf+subsurf mean valence electron - surf+subsurf mean atomic radius feature_ads : list A list of features of the adsorbate for constructing the descriptor. The features include: - mean enegativity - mean valence_electron feature_site : list A list of features of the binding site for constructing the descriptor. The features include: - mean valence electron - mean atomic radius - binding type Returns ------- descriptor : numpy.ndarray A descriptor array for the catalyst-adsorbate system, including the following features: - descriptor_surf: the descriptor of the catalyst surface, including the features specified in `feature_surf`. - descriptor_ads: the descriptor of the adsorbate, including the features specified in `feature_ads`. - descriptor_site: the descriptor of the binding site, including the features specified in `feature_site`. - descriptor_distance: the distance of adsorbed species. Notes ----- This function first extracts the atom types and coordinates from the POSCAR file of the catalyst-adsorbate system, and then distinguishes the adsorbate, surface atoms, and subsurface atoms. The features of the surface and subsurface atoms are calculated based on the `feature_surf` input list, while the features of the adsorbate and binding site are calculated based on the `feature_ads` and `feature_site` input lists, respectively. If multiple adsorbates are found in the system, the descriptors are constructed for each adsorbate and then combined into a single descriptor. If no adsorbate is found, an error message will be printed. """ descriptor = [] descriptor_surf = [] typ = {None: 0, "top": 1, "bri": 2, "fcc": 3, "hcp": 3, "4-fold": 4} typ2 = {None: 0, "top": 0, "bri": 0, "fcc": 0, "hcp": 1, "4-fold": 0} ## the features of catalyst surface : surfce valence electron,surface atomic radius, surf+subsurf mean valence electron,surf+subsurf mean atomic radius # surfatoms,surfatoms_symb=distinguish_atom_binding(poscar, tol=0.03,layer='surf_atom') # subsurfatoms,subsurfatoms_symb=distinguish_atom_binding(poscar, tol=0.03,layer='subsurf_atom') ( adatoms, adatoms_symb, surfatoms, surfatoms_symb, subsurfatoms, subsurfatoms_symb, ) = distinguish_atom_binding(poscar, tol=0.05) feature_value_surf = Construct_descriptor_info(base_info, surfatoms_symb, feature_surf) feature_value_subsurf = Construct_descriptor_info(base_info, subsurfatoms_symb, feature_surf) ## Ignore structures without standard or integrated surface configuration if feature_value_surf != []: feature_value = np.hstack((feature_value_surf, feature_value_subsurf)) # print(feature_value) descriptor_surf = np.around(np.mean(feature_value, 0), 2) ## the feature of adspecies and binding sites ( bind_adatoms, bind_adatoms_symb, adspecie, bind_type_symb, bind_surfatoms, bind_surfatoms_symb, ) = get_binding_adatom(poscar) # print(bind_adatoms,bind_adatoms_symb,adspecie,bind_type_symb,bind_surfatoms,bind_surfatoms_symb) # print(adspecie,bind_type_symb,bind_surfatoms_symb) # print(bind_type_symb[0]) # print(adspecie) if adspecie == []: print(f"The molecule can not adsorb on the {poscar}!") elif len(adspecie) > 1: # print(f"More then 1 adspecie are found in {poscar}!") sys_ad = poscar.split("/")[1].split("_")[-3:-1] if sorted(sys_ad) != sorted(adspecie): print(f"Adsorbed configuration reconstruct on {poscar}!") else: ## construct the descriptor of adsorbate: mean enegativity, mean valence_electron descriptor_ads, descriptor_ads_tmp = [], [] for i, specie in enumerate(adspecie): ads = molecule(specie) ads_symb = ads.get_chemical_symbols() # print(ads_symb) feature_value_ads = Construct_descriptor_info(base_info, ads_symb, feature_ads) # print(feature_value_ads) descriptor_ads_tmp += [np.around(np.mean(feature_value_ads, 0), 2)] descriptor_ads = np.hstack((descriptor_ads_tmp[0], descriptor_ads_tmp[1])) ## construct the descriptor of site: mean valence electron, mean atomic radius, bind type descriptor_site = [] site_type, descriptor_site_tmp, descriptor_site_tmp2 = [], [], [] # typ={None:0,'top':1,'bri':2,'fcc':3,'hcp':3,'4-fold':4} # typ2={None:0,'top':0,'bri':0,'fcc':0,'hcp':1,'4-fold':0} for j, typ_site in enumerate(bind_type_symb): descriptor_site_tmp += [np.hstack((typ.get(typ_site), typ2.get(typ_site)))] feature_value_site = Construct_descriptor_info( base_info, bind_surfatoms_symb[j], feature_site ) descriptor_site_tmp += [np.around(np.mean(feature_value_site, 0), 2)] # print(len(descriptor_site_tmp)) descriptor_site_tmp2 += [ np.hstack((descriptor_site_tmp[0], descriptor_site_tmp[1])) ] # print(descriptor_site) descriptor_site = np.hstack((descriptor_site_tmp2[0], descriptor_site_tmp2[1])) # print(descriptor_site) ## construct the descriptor:distance of adsorbed species dis_symb, descriptor_distance = dis_symb_matrix, dis_matrix = get_distance_adatoms( poscar ) descriptor = np.hstack( (descriptor_surf, descriptor_ads, descriptor_site, descriptor_distance) ) # print(descriptor) return descriptor else: ## construct the descriptor of adsorbate: mean enegativity, mean valence_electron sys_ad = poscar.split("/")[1].split("_")[-3:-1] adspecie_tmp = sys_ad # print(adspecie_tmp) descriptor_ads, descriptor_ads_tmp = [], [] for i, specie in enumerate(adspecie_tmp): ads = molecule(specie) ads_symb = ads.get_chemical_symbols() # print(ads_symb) feature_value_ads = Construct_descriptor_info(base_info, ads_symb, feature_ads) # print(feature_value_ads) descriptor_ads_tmp += [np.around(np.mean(feature_value_ads, 0), 2)] descriptor_ads = np.hstack((descriptor_ads_tmp[0], descriptor_ads_tmp[1])) ## construct the descriptor of site: mean valence electron, mean atomic radius, bind type descriptor_site = [] site_type, descriptor_site_tmp, descriptor_site_tmp2 = [], [], [] for j, typ_site in enumerate(bind_type_symb): descriptor_site_tmp += [np.hstack((typ.get(typ_site), typ2.get(typ_site)))] feature_value_site = Construct_descriptor_info( base_info, bind_surfatoms_symb[j], feature_site ) descriptor_site_tmp += [np.around(np.mean(feature_value_site, 0), 2)] # print(len(descriptor_site_tmp)) descriptor_site_tmp2 += [ np.hstack((descriptor_site_tmp[0], descriptor_site_tmp[1])) ] # print(descriptor_site_tmp2) descriptor_site1 = descriptor_site_tmp2[0] # print(descriptor_site1) ## add the descriptor info about the physical adsorbed site site2_type = [0, 0] site2_surfatom_feature = [0 for i in range(len(feature_site))] # print(np.hstack((site2_type,site2_surfatom_feature))) if adspecie[0] == adspecie_tmp[0]: descriptor_site = np.hstack((descriptor_site1, site2_type, site2_surfatom_feature)) ## construct the descriptor:distance of adsorbed species descriptor_distance = [0] descriptor = np.hstack( (descriptor_surf, descriptor_ads, descriptor_site, descriptor_distance) ) # print(descriptor) # return descriptor elif adspecie[0] == adspecie_tmp[1]: descriptor_site = np.hstack((site2_type, site2_surfatom_feature, descriptor_site1)) ## construct the descriptor:distance of adsorbed species descriptor_distance = [0] descriptor = np.hstack( (descriptor_surf, descriptor_ads, descriptor_site, descriptor_distance) ) # print(descriptor) # return descriptor else: print(f"Adsorbed configuration changes on {poscar}!") else: print(f"Surface info of {poscar} can not be obtained ")
from HTMACat.Extract_info import * from HTMACat.Base_tools import * import os import numpy as np import operator if __name__ == "__main__": feature_surf = ["Valence_electron", "Atomic_radius"] # feature_surf=['Atomic_radius'] feature_ads = ["Enegativity", "Valence_electron"] feature_site = ["Valence_electron", "Atomic_radius"] # feature_site=['Atomic_radius'] adspecies = [ "NH3_OH", "NH3_O", "NH2_O", "NH2_OH", "NH2_H2O", "NH_O", "NH_OH", "NH_H2O", "N_O", "N_OH", "N_H2O", "N_N", "N_NO", "O_O", ] print("----------------------------------------") print("Construct descriptor starts:") print("1st step: Get the whole 'Descriptor+Ead'") file_des = open("descriptor-coad", "w+") file_all = open("descriptor-coad-all", "w+") file_log = open("descriptor-coad-log", "w+") EnerInfo = open("adsE_coad", "r+") des, des_all, des_tmp = [], [], [] for i, Ener in enumerate(EnerInfo): sys = Ener.split(",")[0] ene = Ener.split(",")[-1].strip() poscar = f"./{sys}/optmk/CONTCAR" # poscar= f'./{sys}/CONTCAR' if adspecies != []: sys_all = "_".join(sys.split("_")[-3:-1]) specie = {sys_all}.intersection(set(adspecies)) # print(set([sys_all]),set(adspecies),specie) if specie: print(sys) descriptor = Construct_descriptor_coad( poscar, feature_surf, feature_ads, feature_site ) if descriptor is None: tmp = np.hstack(([sys], ["None"], [ene])) for d in tmp: if d == tmp[-1]: file_all.write("%s\n" % d) else: file_all.write("%s\t" % d) else: tmp = np.hstack(([sys], descriptor, [ene])) for d in tmp: if d == tmp[-1]: file_all.write("%s\n" % d) else: file_all.write("%s\t" % d) # file_all.writelines('%s\n' %tmp) des_tmp += [np.hstack(([sys], descriptor, [ene]))] else: print(sys) descriptor = Construct_descriptor_coad(poscar, feature_surf, feature_ads, feature_site) if descriptor is None: tmp = np.hstack(([sys], ["None"], [ene])) for d in tmp: if d == tmp[-1]: file_all.write("%s\n" % d) else: file_all.write("%s\t" % d) else: tmp = np.hstack(([sys], descriptor, [ene])) for d in tmp: if d == tmp[-1]: file_all.write("%s\n" % d) else: file_all.write("%s\t" % d) # file_all.writelines('%s\n' %tmp) des_tmp += [tmp] # print(des_tmp[0]) print("2nd: Extrate the repeated values") ###Substrate the repeated items according to the feature list m, n = np.array(des_tmp).shape # Extrate the feature value des_feature = [des_tmp[j][1:-1] for j in range(m)] # The feaure list after substrate the repeat des_feature_tmp = np.array(list({tuple(t) for t in des_feature})) k, l = des_feature_tmp.shape # The threshold value Ecut of Ead: if Ead > Ecut, ignore it Ecut = 0.25 # Output the feature and energy if m > k: print("Subtrate Repeat Values Starts:") for i, d1 in enumerate(des_feature_tmp): for j, d2 in enumerate(des_tmp): if all(d1 == d2[1:-1]): tmp = np.hstack((d1, d2[-1])) tmp2 = np.hstack((d1, [d2[0].split("_")[0]])) ## output if float(tmp[-1]) < float(Ecut): # output des+ene for d in tmp: if d == tmp[-1]: file_des.write("%s\n" % d) else: file_des.write("%s\t" % d) # output des+type: des+'Au' for d in tmp2: if d == tmp2[-1]: file_log.write("%s\n" % d) else: file_log.write("%s\t" % d) else: print(f"Ignore the value {tmp[-1]} >= {Ecut}") break else: continue else: print("No Repeated Value") file_all.close() file_des.close() file_log.close() print("Subtrate Repeat Values End !") """print('---------Raw data---------') os.system('cat descriptor-all') print('---------Final data---------') os.system('cat descriptor')"""