Add files via upload

PolyuWeldingSpock · Sep 11, 2023 · 2dce142 · 2dce142
1 parent 62e503d
commit 2dce142
Show file tree

Hide file tree

Showing 3 changed files with 395 additions and 0 deletions.
diff --git a/step1-SCF.sh b/step1-SCF.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+#挑选随机结构，生成对应的POSCAR供VASP计算
+
+
+cat << EOF > temp.py
+
+import dpdata
+import glob
+import os
+import numpy as np
+import shutil
+
+#使用dpdata处理数据，将lammps轨迹输入到dpdata中
+data = dpdata.System('010.dump',fmt='lammps/dump')
+print (data)
+
+#确定要验证的结构数目，例：这里总步数10000steps，随机抽取两帧
+frame_idx = np.random.choice(10000,size=2,replace=False)
+
+#生成最初版本的POSCAR，此时的POSCAR元素种类显示错误
+for idx in frame_idx:
+    data.to("vasp/POSCAR", "POSCAR_{}".format(idx), frame_idx=idx)
+
+
+#更改元素种类
+# Find all files that match the pattern 'POSCAR_*'
+for filename in glob.glob('POSCAR_*'):
+    # Open the original file in read mode and a new file in write mode
+    with open(filename, 'r') as infile, open(filename + '_new', 'w') as outfile:
+        # Loop over each line in the file
+        for line in infile:
+            # Replace 'TYPE_0' with 'O' and 'TYPE_1' with 'I' in the line
+            new_line = line.replace('TYPE_0', 'O').replace('TYPE_1', 'H').replace('TYPE_2', 'C').replace('TYPE_3', 'N').replace('TYPE_4', 'Pb').replace('TYPE_5', 'I')
+            # Write the new line to the new file
+            outfile.write(new_line)
+
+if not os.path.exists('POSCAR-VASP'):
+    os.makedirs('POSCAR-VASP')
+
+for idx in frame_idx:
+    subdir = 'POSCAR-VASP/{}'.format(idx)
+    if not os.path.exists(subdir):
+        os.makedirs(subdir)
+    shutil.move("POSCAR_{}_new".format(idx), subdir)
+
+
+EOF
+
+python temp.py
+rm temp.py
+
+rm POSCAR_*
+for f in POSCAR-VASP/*/POSCAR_*; do mv -- "$f" "${f%/*}/POSCAR"; done
+for d in POSCAR-VASP/*/; do cp INCAR KPOINTS "$d"; done
diff --git a/step2-energy.py b/step2-energy.py
@@ -0,0 +1,119 @@
+import re
+import numpy as np
+import glob
+import os
+import matplotlib.pyplot as plt
+from sklearn.metrics import mean_squared_error
+from math import sqrt	
+
+
+#######VASP#######
+
+#匹配VASP OUTCAR中的体系总能量
+dft_energy = r'energy  without entropy= (\s+-?\d+\.\d+)  energy\(sigma->0\) = (?P<energy>\s+-?\d+\.\d+)'
+
+# 获取文件夹名作为数字的函数
+def get_numeric_folder_name(path):
+    folder_name = os.path.basename(os.path.dirname(path))
+    return int(folder_name)
+
+def vasp_energies(directory): 
+    vasp_energies = {}
+
+    outcar_files = sorted(glob.glob(directory + '/**/OUTCAR', recursive=True), key=get_numeric_folder_name)
+
+    for outcar_file in outcar_files:
+        with open(outcar_file, 'r') as f:
+            for line in f:
+                match = re.search(dft_energy, line)
+                if match:
+                    energy = match['energy']
+                    step = get_numeric_folder_name(outcar_file)
+                    vasp_energies[step] = float(energy)
+    return vasp_energies
+
+#######VASP#######
+
+#######LAMMPS#######
+
+#匹配lammps输出的log.lammps中的能量字段
+def lammps_energies(lammpslog_file):
+    energy_log = re.compile(
+    r"""
+        \s*(?P<step>\d+)            # Step
+        \s+(?P<pot_e>[-]?\d+\.?\d*) # PotEng
+        \s+[-]?\d+\.?\d*            # KinEng
+        \s+(?P<tot_e>[-]?\d+\.?\d*) # TotEng
+        \s+[-]?\d+\.?\d*            # Temp
+        \s+[-]?\d+\.?\d*            # Press
+        \s+[-]?\d+\.?\d*            # Volume
+        \s*\n                       # Match any trailing whitespace until the newline
+    """,
+    re.VERBOSE
+    )
+#储存进一个{}中
+    lammps_energies = {}
+    with open(lammpslog_file, 'r') as f:
+        content = f.read()
+        for match in energy_log.finditer(content):
+            step = int(match['step'])
+            pot_e = float(match['pot_e'])
+            lammps_energies[step] = pot_e
+
+    return lammps_energies
+
+#######LAMMPS#######
+
+#使用时需修改VASP和LAMMPS输出文件的路径以及原子个数
+vasp_out = './VASP-confirm/'
+vasp_energies = vasp_energies(vasp_out)
+lammps_out = './lammps-thermo1/5ps/log.lammps'
+lammps_energies = lammps_energies(lammps_out)
+atoms = 222
+
+print("VASP Energies:")
+for step, energy in vasp_energies.items():
+    print(f"Step: {step}, Energy: {energy}")
+
+# Normalize the energies to per atom
+vasp_energies_per_atom = {k: v / atoms for k, v in vasp_energies.items()}
+lammps_energies_per_atom = {k: v / atoms for k, v in lammps_energies.items()}
+
+# ...
+
+# 匹配VASP和LAMMPS步骤并提取能量
+x = []
+y = []
+for step, vasp_energy in vasp_energies_per_atom.items():
+    lammps_energy = lammps_energies_per_atom.get(step + 1)
+    if lammps_energy is not None:
+        x.append(lammps_energy)
+        y.append(vasp_energy)
+
+# 计算MSE和RMSE
+mse = mean_squared_error(y, x)
+rmse = sqrt(mse)
+
+print(f"MSE: {mse}, RMSE: {rmse}")
+
+# 生成散点图
+plt.rcParams["font.family"] = "Arial"
+plt.figure(figsize=(12, 12))
+plt.scatter(x, y, color='orange',alpha=0.8)
+
+# Add y=x line
+lims = [np.min([plt.xlim(), plt.ylim()]),  # min of both axes
+        np.max([plt.xlim(), plt.ylim()])]  # max of both axes
+
+plt.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
+plt.xlim(lims)
+plt.ylim(lims)
+
+plt.xlabel('DPMD Energy (eV/atom)', fontsize=26)
+plt.ylabel('DFT Energy (eV/atom)', fontsize=26)
+plt.tick_params(axis='both', labelsize=22)
+for axis in ['top','bottom','left','right']:
+  plt.gca().spines[axis].set_linewidth(2)
+
+plt.savefig("energy.tiff", dpi=300)
+plt.show()
diff --git a/step3-force.py b/step3-force.py
@@ -0,0 +1,221 @@
+import numpy as np
+import os
+import glob
+import re
+import matplotlib.pyplot as plt
+from collections import defaultdict
+from sklearn.metrics import mean_squared_error
+import pandas as pd
+
+#######VASP###########
+
+def get_numeric_folder_name(path):
+    folder_name = os.path.basename(os.path.dirname(path))
+    return int(folder_name)
+
+def vasp_forces(directory):
+    block_regex = re.compile(
+        r"""
+        POSITION\s+TOTAL-FORCE\s\(eV\/Angst\)\n
+        \s\-+\n
+        (?P<block>
+            (
+                \s+\d+\.\d+
+                \s+\d+\.\d+
+                \s+\d+\.\d+
+                \s+[\s-]\d+\.\d+
+                \s+[\s-]\d+\.\d+
+                \s+[\s-]\d+\.\d+
+            \n)+
+        )
+        """,
+        re.VERBOSE
+    )
+
+    line_regex = re.compile(
+        r"""
+        \s+(?P<x_cor>\d+\.\d+)
+        \s+(?P<y_cor>\d+\.\d+)
+        \s+(?P<z_cor>\d+\.\d+)
+        \s+(?P<x_force>[\s-]\d+\.\d+)
+        \s+(?P<y_force>[\s-]\d+\.\d+)
+        \s+(?P<z_force>[\s-]\d+\.\d+)
+        """,
+        re.VERBOSE
+    )
+
+    dft_forces = {}
+    outcar_files = sorted(glob.glob(directory + '/**/OUTCAR', recursive=True), key=get_numeric_folder_name)
+
+    for outcar_file in outcar_files:
+        with open(outcar_file, 'r') as f:
+            content = f.read()
+            for block_match in block_regex.finditer(content):
+                step = get_numeric_folder_name(outcar_file)
+                forces_for_step = []
+
+                for line_match in line_regex.finditer(block_match.group('block')):
+                    x = line_match.group('x_cor')
+                    y = line_match.group('y_cor')
+                    z = line_match.group('z_cor')
+                    fx = line_match.group('x_force')
+                    fy = line_match.group('y_force')
+                    fz = line_match.group('z_force')
+
+                    forces_for_step.append({
+                        'position': np.array([float(x), float(y), float(z)]),
+                        'force': np.array([float(fx), float(fy), float(fz)])
+                    })
+                dft_forces[step] = forces_for_step
+    num_steps = len(dft_forces)
+    print(f"There are {num_steps} steps in the DFT forces data.")
+    return dft_forces
+
+
+#######VASP###########
+
+#######LAMMPS##########
+
+def lammps_forces(directory):
+    block_regex = re.compile(
+        r"""
+        ITEM\:\s+TIMESTEP\n
+        (?P<md_step>\d+)\n
+        ITEM\:\sNUMBER\sOF\sATOMS\n
+        \d+\n
+        ITEM\:\sBOX\sBOUNDS\spp\spp\spp\n
+        \d+\.\d+e[+-]\d+\s+\d+\.\d+e[+-]\d+\n
+        \d+\.\d+e[+-]\d+\s+\d+\.\d+e[+-]\d+\n
+        \d+\.\d+e[+-]\d+\s+\d+\.\d+e[+-]\d+\n
+        ITEM\:\sATOMS\sid\stype\selement\sx\sy\sz\sfx\sfy\sfz\n
+        (?P<block>
+            (
+                \d+
+                \s+\d+
+                \s+[A-Z][a-z]?
+                \s+-?\d+(\.\d+)?(e-?\d+)?
+                \s+-?\d+(\.\d+)?(e-?\d+)?
+                \s+-?\d+(\.\d+)?(e-?\d+)?
+                \s+-?\d+(\.\d+)?(e-?\d+)?
+                \s+-?\d+(\.\d+)?(e-?\d+)?
+                \s+-?\d+(\.\d+)?(e-?\d+)?               
+                \n
+            )+
+        )
+        """,
+        re.VERBOSE
+    )
+
+    line_regex = re.compile(
+        r"""
+        (?P<ATOMS>\d+)
+        \s+\d+
+        \s+[A-Z][a-z]?
+        \s+(?P<x>-?\d+(\.\d+)?(e-?\d+)?)
+        \s+(?P<y>-?\d+(\.\d+)?(e-?\d+)?)
+        \s+(?P<z>-?\d+(\.\d+)?(e-?\d+)?)
+        \s+(?P<fx>-?\d+(\.\d+)?(e-?\d+)?)
+        \s+(?P<fy>-?\d+(\.\d+)?(e-?\d+)?)
+        \s+(?P<fz>-?\d+(\.\d+)?(e-?\d+)?)
+        """,
+        re.VERBOSE
+    )
+
+    # 创建一个字典，用于存储每个md_step的数据
+    md_forces = defaultdict(list)
+
+    with open(directory, 'r') as file:
+        content = file.read()
+
+    for block_match in block_regex.finditer(content):
+        md_step = int(block_match.group('md_step'))
+
+        # 将原子数据存储为一个字典的列表，然后按照 ATOMS 的值进行排序
+        atom_data = []
+        for line_match in line_regex.finditer(block_match.group('block')):
+            data = {
+                'ATOMS': int(line_match.group('ATOMS')),
+                'x': float(line_match.group('x')),
+                'y': float(line_match.group('y')),
+                'z': float(line_match.group('z')),
+                'fx': float(line_match.group('fx')),
+                'fy': float(line_match.group('fy')),
+                'fz': float(line_match.group('fz')),
+            }
+            atom_data.append(data)
+
+        # 按照 ATOMS 的值进行排序
+        atom_data.sort(key=lambda x: x['ATOMS'])
+
+        # 将排序后的数据添加到对应的md_step中
+        md_forces[md_step] = atom_data
+    num_md_steps = len(md_forces)
+    print(f"There are {num_md_steps} steps in the MD forces data.")
+
+    return md_forces
+
+dft_forces = vasp_forces('./VASP-confirm/')
+md_forces = lammps_forces('./lammps/010.dump')
+
+def calculate_total_rmse(dft_forces, md_forces):
+    dft_all_forces = []
+    md_all_forces = []
+    for step in dft_forces.keys():
+        if step in md_forces.keys():
+            dft_forces_step = np.array([force_dict['force'] for force_dict in dft_forces[step]])
+            md_forces_step = np.array([np.array([force_dict['fx'], force_dict['fy'], force_dict['fz']]) for force_dict in md_forces[step]])
+
+            # Assert the shapes of arrays at each step
+            assert dft_forces_step.shape == md_forces_step.shape, f"Shapes of arrays are not consistent at step {step}!"
+
+            dft_all_forces.extend(dft_forces_step.flatten())
+            md_all_forces.extend(md_forces_step.flatten())
+
+    total_rmse = np.sqrt(mean_squared_error(dft_all_forces, md_all_forces))
+    return total_rmse
+
+
+total_rmse = calculate_total_rmse(dft_forces, md_forces)
+print(f"The total RMSE across all steps, atoms and directions is: {total_rmse}")
+
+def plot_force_correlation(dft_forces, md_forces):
+    dft_all_forces = []
+    md_all_forces = []
+    for step in dft_forces.keys():
+        if step in md_forces.keys():
+            dft_forces_step = np.array([force_dict['force'] for force_dict in dft_forces[step]])
+            md_forces_step = np.array([np.array([force_dict['fx'], force_dict['fy'], force_dict['fz']]) for force_dict in md_forces[step]])
+
+            # Assert the shapes of arrays at each step
+            assert dft_forces_step.shape == md_forces_step.shape, f"Shapes of arrays are not consistent at step {step}!"
+
+            dft_all_forces.extend(dft_forces_step.flatten())
+            md_all_forces.extend(md_forces_step.flatten())
+
+
+    plt.figure(figsize=(10, 10))
+    plt.rcParams["font.family"] = "Arial"
+    plt.scatter(md_all_forces, dft_all_forces, s=1)
+    plt.xlabel('DPMD forces (eV/Angstrom)',fontsize=26)
+    plt.ylabel('DFT forces (eV/Angstrom)',fontsize=26)
+
+    lims = [np.min([plt.xlim(), plt.ylim()]),  # min of both axes
+        np.max([plt.xlim(), plt.ylim()])]  # max of both axes
+
+    plt.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
+    plt.xlim(lims)
+    plt.ylim(lims)
+
+    plt.tick_params(axis='both', labelsize=22)
+    for axis in ['top','bottom','left','right']:
+        plt.gca().spines[axis].set_linewidth(2)
+
+    plt.savefig("force.tiff", dpi=300)
+    plt.show()
+
+plot_force_correlation(dft_forces, md_forces)
+
+
+
+
+