Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
PolyuWeldingSpock authored Sep 11, 2023
1 parent 62e503d commit 2dce142
Show file tree
Hide file tree
Showing 3 changed files with 395 additions and 0 deletions.
55 changes: 55 additions & 0 deletions step1-SCF.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash

#挑选随机结构,生成对应的POSCAR供VASP计算


cat << EOF > temp.py
import dpdata
import glob
import os
import numpy as np
import shutil
#使用dpdata处理数据,将lammps轨迹输入到dpdata中
data = dpdata.System('010.dump',fmt='lammps/dump')
print (data)
#确定要验证的结构数目,例:这里总步数10000steps,随机抽取两帧
frame_idx = np.random.choice(10000,size=2,replace=False)
#生成最初版本的POSCAR,此时的POSCAR元素种类显示错误
for idx in frame_idx:
data.to("vasp/POSCAR", "POSCAR_{}".format(idx), frame_idx=idx)
#更改元素种类
# Find all files that match the pattern 'POSCAR_*'
for filename in glob.glob('POSCAR_*'):
# Open the original file in read mode and a new file in write mode
with open(filename, 'r') as infile, open(filename + '_new', 'w') as outfile:
# Loop over each line in the file
for line in infile:
# Replace 'TYPE_0' with 'O' and 'TYPE_1' with 'I' in the line
new_line = line.replace('TYPE_0', 'O').replace('TYPE_1', 'H').replace('TYPE_2', 'C').replace('TYPE_3', 'N').replace('TYPE_4', 'Pb').replace('TYPE_5', 'I')
# Write the new line to the new file
outfile.write(new_line)
if not os.path.exists('POSCAR-VASP'):
os.makedirs('POSCAR-VASP')
for idx in frame_idx:
subdir = 'POSCAR-VASP/{}'.format(idx)
if not os.path.exists(subdir):
os.makedirs(subdir)
shutil.move("POSCAR_{}_new".format(idx), subdir)
EOF

python temp.py
rm temp.py

rm POSCAR_*
for f in POSCAR-VASP/*/POSCAR_*; do mv -- "$f" "${f%/*}/POSCAR"; done
for d in POSCAR-VASP/*/; do cp INCAR KPOINTS "$d"; done
119 changes: 119 additions & 0 deletions step2-energy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import re
import numpy as np
import glob
import os
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from math import sqrt


#######VASP#######

#匹配VASP OUTCAR中的体系总能量
dft_energy = r'energy without entropy= (\s+-?\d+\.\d+) energy\(sigma->0\) = (?P<energy>\s+-?\d+\.\d+)'

# 获取文件夹名作为数字的函数
def get_numeric_folder_name(path):
folder_name = os.path.basename(os.path.dirname(path))
return int(folder_name)

def vasp_energies(directory):
vasp_energies = {}

outcar_files = sorted(glob.glob(directory + '/**/OUTCAR', recursive=True), key=get_numeric_folder_name)

for outcar_file in outcar_files:
with open(outcar_file, 'r') as f:
for line in f:
match = re.search(dft_energy, line)
if match:
energy = match['energy']
step = get_numeric_folder_name(outcar_file)
vasp_energies[step] = float(energy)
return vasp_energies

#######VASP#######

#######LAMMPS#######

#匹配lammps输出的log.lammps中的能量字段
def lammps_energies(lammpslog_file):
energy_log = re.compile(
r"""
\s*(?P<step>\d+) # Step
\s+(?P<pot_e>[-]?\d+\.?\d*) # PotEng
\s+[-]?\d+\.?\d* # KinEng
\s+(?P<tot_e>[-]?\d+\.?\d*) # TotEng
\s+[-]?\d+\.?\d* # Temp
\s+[-]?\d+\.?\d* # Press
\s+[-]?\d+\.?\d* # Volume
\s*\n # Match any trailing whitespace until the newline
""",
re.VERBOSE
)
#储存进一个{}中
lammps_energies = {}
with open(lammpslog_file, 'r') as f:
content = f.read()
for match in energy_log.finditer(content):
step = int(match['step'])
pot_e = float(match['pot_e'])
lammps_energies[step] = pot_e

return lammps_energies

#######LAMMPS#######

#使用时需修改VASP和LAMMPS输出文件的路径以及原子个数
vasp_out = './VASP-confirm/'
vasp_energies = vasp_energies(vasp_out)
lammps_out = './lammps-thermo1/5ps/log.lammps'
lammps_energies = lammps_energies(lammps_out)
atoms = 222

print("VASP Energies:")
for step, energy in vasp_energies.items():
print(f"Step: {step}, Energy: {energy}")

# Normalize the energies to per atom
vasp_energies_per_atom = {k: v / atoms for k, v in vasp_energies.items()}
lammps_energies_per_atom = {k: v / atoms for k, v in lammps_energies.items()}

# ...

# 匹配VASP和LAMMPS步骤并提取能量
x = []
y = []
for step, vasp_energy in vasp_energies_per_atom.items():
lammps_energy = lammps_energies_per_atom.get(step + 1)
if lammps_energy is not None:
x.append(lammps_energy)
y.append(vasp_energy)

# 计算MSE和RMSE
mse = mean_squared_error(y, x)
rmse = sqrt(mse)

print(f"MSE: {mse}, RMSE: {rmse}")

# 生成散点图
plt.rcParams["font.family"] = "Arial"
plt.figure(figsize=(12, 12))
plt.scatter(x, y, color='orange',alpha=0.8)

# Add y=x line
lims = [np.min([plt.xlim(), plt.ylim()]), # min of both axes
np.max([plt.xlim(), plt.ylim()])] # max of both axes

plt.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
plt.xlim(lims)
plt.ylim(lims)

plt.xlabel('DPMD Energy (eV/atom)', fontsize=26)
plt.ylabel('DFT Energy (eV/atom)', fontsize=26)
plt.tick_params(axis='both', labelsize=22)
for axis in ['top','bottom','left','right']:
plt.gca().spines[axis].set_linewidth(2)

plt.savefig("energy.tiff", dpi=300)
plt.show()
221 changes: 221 additions & 0 deletions step3-force.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
import numpy as np
import os
import glob
import re
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn.metrics import mean_squared_error
import pandas as pd

#######VASP###########

def get_numeric_folder_name(path):
folder_name = os.path.basename(os.path.dirname(path))
return int(folder_name)

def vasp_forces(directory):
block_regex = re.compile(
r"""
POSITION\s+TOTAL-FORCE\s\(eV\/Angst\)\n
\s\-+\n
(?P<block>
(
\s+\d+\.\d+
\s+\d+\.\d+
\s+\d+\.\d+
\s+[\s-]\d+\.\d+
\s+[\s-]\d+\.\d+
\s+[\s-]\d+\.\d+
\n)+
)
""",
re.VERBOSE
)

line_regex = re.compile(
r"""
\s+(?P<x_cor>\d+\.\d+)
\s+(?P<y_cor>\d+\.\d+)
\s+(?P<z_cor>\d+\.\d+)
\s+(?P<x_force>[\s-]\d+\.\d+)
\s+(?P<y_force>[\s-]\d+\.\d+)
\s+(?P<z_force>[\s-]\d+\.\d+)
""",
re.VERBOSE
)

dft_forces = {}
outcar_files = sorted(glob.glob(directory + '/**/OUTCAR', recursive=True), key=get_numeric_folder_name)

for outcar_file in outcar_files:
with open(outcar_file, 'r') as f:
content = f.read()
for block_match in block_regex.finditer(content):
step = get_numeric_folder_name(outcar_file)
forces_for_step = []

for line_match in line_regex.finditer(block_match.group('block')):
x = line_match.group('x_cor')
y = line_match.group('y_cor')
z = line_match.group('z_cor')
fx = line_match.group('x_force')
fy = line_match.group('y_force')
fz = line_match.group('z_force')

forces_for_step.append({
'position': np.array([float(x), float(y), float(z)]),
'force': np.array([float(fx), float(fy), float(fz)])
})
dft_forces[step] = forces_for_step
num_steps = len(dft_forces)
print(f"There are {num_steps} steps in the DFT forces data.")
return dft_forces


#######VASP###########

#######LAMMPS##########

def lammps_forces(directory):
block_regex = re.compile(
r"""
ITEM\:\s+TIMESTEP\n
(?P<md_step>\d+)\n
ITEM\:\sNUMBER\sOF\sATOMS\n
\d+\n
ITEM\:\sBOX\sBOUNDS\spp\spp\spp\n
\d+\.\d+e[+-]\d+\s+\d+\.\d+e[+-]\d+\n
\d+\.\d+e[+-]\d+\s+\d+\.\d+e[+-]\d+\n
\d+\.\d+e[+-]\d+\s+\d+\.\d+e[+-]\d+\n
ITEM\:\sATOMS\sid\stype\selement\sx\sy\sz\sfx\sfy\sfz\n
(?P<block>
(
\d+
\s+\d+
\s+[A-Z][a-z]?
\s+-?\d+(\.\d+)?(e-?\d+)?
\s+-?\d+(\.\d+)?(e-?\d+)?
\s+-?\d+(\.\d+)?(e-?\d+)?
\s+-?\d+(\.\d+)?(e-?\d+)?
\s+-?\d+(\.\d+)?(e-?\d+)?
\s+-?\d+(\.\d+)?(e-?\d+)?
\n
)+
)
""",
re.VERBOSE
)

line_regex = re.compile(
r"""
(?P<ATOMS>\d+)
\s+\d+
\s+[A-Z][a-z]?
\s+(?P<x>-?\d+(\.\d+)?(e-?\d+)?)
\s+(?P<y>-?\d+(\.\d+)?(e-?\d+)?)
\s+(?P<z>-?\d+(\.\d+)?(e-?\d+)?)
\s+(?P<fx>-?\d+(\.\d+)?(e-?\d+)?)
\s+(?P<fy>-?\d+(\.\d+)?(e-?\d+)?)
\s+(?P<fz>-?\d+(\.\d+)?(e-?\d+)?)
""",
re.VERBOSE
)

# 创建一个字典,用于存储每个md_step的数据
md_forces = defaultdict(list)

with open(directory, 'r') as file:
content = file.read()

for block_match in block_regex.finditer(content):
md_step = int(block_match.group('md_step'))

# 将原子数据存储为一个字典的列表,然后按照 ATOMS 的值进行排序
atom_data = []
for line_match in line_regex.finditer(block_match.group('block')):
data = {
'ATOMS': int(line_match.group('ATOMS')),
'x': float(line_match.group('x')),
'y': float(line_match.group('y')),
'z': float(line_match.group('z')),
'fx': float(line_match.group('fx')),
'fy': float(line_match.group('fy')),
'fz': float(line_match.group('fz')),
}
atom_data.append(data)

# 按照 ATOMS 的值进行排序
atom_data.sort(key=lambda x: x['ATOMS'])

# 将排序后的数据添加到对应的md_step中
md_forces[md_step] = atom_data
num_md_steps = len(md_forces)
print(f"There are {num_md_steps} steps in the MD forces data.")

return md_forces

dft_forces = vasp_forces('./VASP-confirm/')
md_forces = lammps_forces('./lammps/010.dump')

def calculate_total_rmse(dft_forces, md_forces):
dft_all_forces = []
md_all_forces = []
for step in dft_forces.keys():
if step in md_forces.keys():
dft_forces_step = np.array([force_dict['force'] for force_dict in dft_forces[step]])
md_forces_step = np.array([np.array([force_dict['fx'], force_dict['fy'], force_dict['fz']]) for force_dict in md_forces[step]])

# Assert the shapes of arrays at each step
assert dft_forces_step.shape == md_forces_step.shape, f"Shapes of arrays are not consistent at step {step}!"

dft_all_forces.extend(dft_forces_step.flatten())
md_all_forces.extend(md_forces_step.flatten())

total_rmse = np.sqrt(mean_squared_error(dft_all_forces, md_all_forces))
return total_rmse


total_rmse = calculate_total_rmse(dft_forces, md_forces)
print(f"The total RMSE across all steps, atoms and directions is: {total_rmse}")

def plot_force_correlation(dft_forces, md_forces):
dft_all_forces = []
md_all_forces = []
for step in dft_forces.keys():
if step in md_forces.keys():
dft_forces_step = np.array([force_dict['force'] for force_dict in dft_forces[step]])
md_forces_step = np.array([np.array([force_dict['fx'], force_dict['fy'], force_dict['fz']]) for force_dict in md_forces[step]])

# Assert the shapes of arrays at each step
assert dft_forces_step.shape == md_forces_step.shape, f"Shapes of arrays are not consistent at step {step}!"

dft_all_forces.extend(dft_forces_step.flatten())
md_all_forces.extend(md_forces_step.flatten())


plt.figure(figsize=(10, 10))
plt.rcParams["font.family"] = "Arial"
plt.scatter(md_all_forces, dft_all_forces, s=1)
plt.xlabel('DPMD forces (eV/Angstrom)',fontsize=26)
plt.ylabel('DFT forces (eV/Angstrom)',fontsize=26)

lims = [np.min([plt.xlim(), plt.ylim()]), # min of both axes
np.max([plt.xlim(), plt.ylim()])] # max of both axes

plt.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
plt.xlim(lims)
plt.ylim(lims)

plt.tick_params(axis='both', labelsize=22)
for axis in ['top','bottom','left','right']:
plt.gca().spines[axis].set_linewidth(2)

plt.savefig("force.tiff", dpi=300)
plt.show()

plot_force_correlation(dft_forces, md_forces)





0 comments on commit 2dce142

Please sign in to comment.