Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated oxidation.py and its tests #366

Open
wants to merge 9 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ repos:
rev: v2.3.0
hooks:
- id: codespell
stages: [commit, commit-msg]
stages: [pre-commit, commit-msg]
args: [--toml, pyproject.toml]
additional_dependencies:
- tomli
Expand Down
109 changes: 109 additions & 0 deletions smact/tests/files/oxidation_states_icsd24_consensus.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#
# Oxidation state set
# Source: ICSD (2024), filtered for low commonality of reports
#
# Testing writing of ICSD 24 oxidation states list.
#
H -1 1
He
Li 1
Be 2
B -5 -3 -2 -1 1 2 3
C -4 -3 -2 -1 1 2 3 4
N -5 -4 -3 -2 -1 1 2 3 4 5
O -2 -1 2
F -1
Ne
Na 1
Mg 2
Al -3 3
Si -4 -3 -2 -1 1 2 3 4
P -4 -3 -2 -1 1 2 3 4 5
S -2 -1 1 2 3 4 5 6
Cl -1 1 3 4 5 7
Ar
K 1
Ca 1 2
Sc 1 2 3
Ti 1 2 3 4
V 1 2 3 4 5
Cr 1 2 3 4 5 6
Mn -1 1 2 3 4 5 6 7
Fe -4 1 2 3 4 5 6
Co -1 1 2 3 4 5
Ni 1 2 3 4
Cu 1 2 3 4
Zn 1 2 3 4
Ga -4 -3 1 2 3 4
Ge -4 -3 -2 -1 1 2 3 4
As -3 -2 -1 1 2 3 4 5
Se -2 -1 1 2 3 4 5 6
Br -1 1 3 5 7
Kr 2
Rb 1
Sr 2 4
Y 1 2 3 4
Zr 1 2 3 4
Nb 1 2 3 4 5 6
Mo -1 1 2 3 4 5 6 7
Tc 1 2 3 4 5 6 7
Ru -1 1 2 3 4 5 6 7
Rh -1 1 2 3 4 5
Pd 1 2 3 4
Ag 1 2 3
Cd 1 2
In -3 -2 -1 1 2 3
Sn -4 -3 -2 -1 1 2 3 4
Sb -3 -2 -1 1 2 3 4 5
Te -3 -2 -1 1 2 3 4 5 6
I -1 1 3 5 7
Xe 2 4 6 8
Cs 1
Ba 2
La 1 2 3 4
Ce 2 3 4
Pr 2 3 4
Nd 2 3 4 5
Pm 3
Sm 2 3 4
Eu 2 3 4 6
Gd 2 3 4
Tb 1 2 3 4
Dy 2 3 4
Ho 2 3
Er 2 3 4
Tm 2 3
Yb 2 3 4
Lu 2 3
Hf 2 3 4
Ta 1 2 3 4 5 6
W 2 3 4 5 6
Re 1 2 3 4 5 6 7
Os 1 2 3 4 5 6 7 8
Ir 1 2 3 4 5 6
Pt -2 1 2 3 4 5 6
Au -1 1 2 3 5
Hg 1 2
Tl -2 -1 1 2 3
Pb -4 -1 1 2 3 4
Bi -3 -2 -1 1 2 3 4 5
Po 4
At
Rn
Fr
Ra 2
Ac 3
Th 2 3 4 5
Pa 3 4 5
U 2 3 4 5 6
Np 2 3 4 5 6 7
Pu 2 3 4 5 6 7
Am 2 3 4 5 6
Cm 3 4
Bk 3 4
Cf 3
Es 3
Fm
Md
No
Lr
111 changes: 111 additions & 0 deletions smact/tests/files/oxidation_states_icsd24_consensus_w_0.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#
# Oxidation state set
# Source: ICSD (2024), filtered for low commonality of reports
#
# Testing writing of ICSD 24 oxidation states list.
#
# Includes oxidation state 0
#
H -1 0 1
He 0
Li 0 1
Be 0 2
B -5 -3 -2 -1 0 1 2 3
C -4 -3 -2 -1 0 1 2 3 4
N -5 -4 -3 -2 -1 0 1 2 3 4 5
O -2 -1 0 2
F -1 0
Ne 0
Na 0 1
Mg 0 2
Al -3 0 3
Si -4 -3 -2 -1 0 1 2 3 4
P -4 -3 -2 -1 0 1 2 3 4 5
S -2 -1 0 1 2 3 4 5 6
Cl -1 0 1 3 4 5 7
Ar 0
K 0 1
Ca 0 1 2
Sc 0 1 2 3
Ti 0 1 2 3 4
V 0 1 2 3 4 5
Cr 0 1 2 3 4 5 6
Mn -1 0 1 2 3 4 5 6 7
Fe -4 0 1 2 3 4 5 6
Co -1 0 1 2 3 4 5
Ni 0 1 2 3 4
Cu 0 1 2 3 4
Zn 0 1 2 3 4
Ga -4 -3 0 1 2 3 4
Ge -4 -3 -2 -1 0 1 2 3 4
As -3 -2 -1 0 1 2 3 4 5
Se -2 -1 0 1 2 3 4 5 6
Br -1 0 1 3 5 7
Kr 0 2
Rb 0 1
Sr 0 2 4
Y 0 1 2 3 4
Zr 0 1 2 3 4
Nb 0 1 2 3 4 5 6
Mo -1 0 1 2 3 4 5 6 7
Tc 0 1 2 3 4 5 6 7
Ru -1 0 1 2 3 4 5 6 7
Rh -1 0 1 2 3 4 5
Pd 0 1 2 3 4
Ag 0 1 2 3
Cd 0 1 2
In -3 -2 -1 0 1 2 3
Sn -4 -3 -2 -1 0 1 2 3 4
Sb -3 -2 -1 0 1 2 3 4 5
Te -3 -2 -1 0 1 2 3 4 5 6
I -1 0 1 3 5 7
Xe 0 2 4 6 8
Cs 0 1
Ba 0 2
La 0 1 2 3 4
Ce 0 2 3 4
Pr 0 2 3 4
Nd 0 2 3 4 5
Pm 3
Sm 0 2 3 4
Eu 0 2 3 4 6
Gd 0 2 3 4
Tb 0 1 2 3 4
Dy 0 2 3 4
Ho 0 2 3
Er 0 2 3 4
Tm 0 2 3
Yb 0 2 3 4
Lu 0 2 3
Hf 0 2 3 4
Ta 0 1 2 3 4 5 6
W 0 2 3 4 5 6
Re 0 1 2 3 4 5 6 7
Os 0 1 2 3 4 5 6 7 8
Ir 0 1 2 3 4 5 6
Pt -2 0 1 2 3 4 5 6
Au -1 0 1 2 3 5
Hg 0 1 2
Tl -2 -1 0 1 2 3
Pb -4 -1 0 1 2 3 4
Bi -3 -2 -1 0 1 2 3 4 5
Po 0 4
At
Rn
Fr
Ra 2
Ac 3
Th 0 2 3 4 5
Pa 0 3 4 5
U 0 2 3 4 5 6
Np 0 2 3 4 5 6 7
Pu 0 2 3 4 5 6 7
Am 0 2 3 4 5 6
Cm 0 3 4
Bk 0 3 4
Cf 0 3
Es 3
Fm
Md
No
Lr
98 changes: 75 additions & 23 deletions smact/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ def test_download_compounds_with_mp_api(self):


files_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files")
TEST_ICSD_OX_STATES = os.path.join(files_dir, "test_icsd_oxidation_states_filter_1000.txt")
TEST_ICSD_OX_STATES_W_ZERO = os.path.join(files_dir, "test_icsd_oxidation_states_filter_1000_w_0_ox_state.txt")
TEST_ICSD_OX_STATES = os.path.join(files_dir, "oxidation_states_icsd24_consensus.txt")
TEST_ICSD_OX_STATES_W_ZERO = os.path.join(files_dir, "oxidation_states_icsd24_consensus_w_0.txt")


class OxidationStatesTest(unittest.TestCase):
Expand All @@ -178,43 +178,95 @@ def setUp(self):
def test_oxidation_states_filter(self):
self.assertIsInstance(self.ox_filter.ox_states_df, pd.DataFrame)
threshold = 10
filtered_df = self.ox_filter.filter(threshold)
filtered_df = self.ox_filter.filter(consensus=threshold)

self.assertIsInstance(filtered_df, pd.DataFrame)
self.assertEqual(filtered_df.columns.tolist(), ["element", "oxidation_state"])
# self.assertEqual(filtered_df.loc[""])

def test_oxidation_states_write(self):
threshold = 1000
self.maxDiff = None

filename = "test_ox_states"
filename_w_zero = "test_ox_states_w_zero"
comment = "Testing writing of ICSD 24 oxidation states list."
self.ox_filter.write(filename, threshold, comment=comment)
self.ox_filter.write(filename_w_zero, threshold, include_zero=True, comment=comment)
self.ox_filter.write(
filename,
comment=comment,
consensus=3,
include_zero=False,
commonality="low",
)

self.assertTrue(os.path.exists(f"{filename}.txt"))
# Read the file and check its content
with open(f"{filename}.txt") as f:
self.assertEqual(f.read(), self.test_ox_states)

self.assertTrue(os.path.exists(f"{filename}_w_zero.txt"))
with open(f"{filename}_w_zero.txt") as f:
self.assertEqual(f.read(), self.test_ox_states_w_zero)
# Clean up
content = f.read()
# Check if the comment is included in the file
self.assertIn(comment, content)
# Check if the file content matches the expected content
self.assertEqual(content, self.test_ox_states)
os.remove(f"{filename}.txt")
os.remove(f"{filename}_w_zero.txt")

def test_oxidation_states_filter_species_list(self):
for threshold, length in [(0, 490), (5, 358), (50, 227)]:
species_list = self.ox_filter.get_species_list(threshold)
self.assertIsInstance(species_list, list)
self.assertEqual(len(species_list), length)
filename_w_zero = "test_ox_states_w_zero"
comment = "Testing writing of ICSD 24 oxidation states list."
self.ox_filter.write(
filename_w_zero,
comment=comment,
consensus=3,
include_zero=True,
commonality="low",
)

self.assertTrue(os.path.exists(f"{filename_w_zero}.txt"))
# Read the file and check its content
with open(f"{filename_w_zero}.txt") as f:
content = f.read()
# Check if the comment is included in the file
self.assertIn(comment, content)
# Check if the file content matches the expected content
self.assertEqual(content, self.test_ox_states_w_zero)
os.remove(f"{filename_w_zero}.txt")

def test_get_species_list(self):
# Test with default parameters
species_list = self.ox_filter.get_species_list()
self.assertIsInstance(species_list, list)
self.assertGreater(len(species_list), 0) # Ensure the list is not empty

# Test with include_zero=True
species_list_with_zero = self.ox_filter.get_species_list(include_zero=True)
self.assertIsInstance(species_list_with_zero, list)
self.assertGreater(len(species_list_with_zero), 0)

# Test with include_one_oxidation_state=True
species_list_with_one = self.ox_filter.get_species_list(include_one_oxidation_state=True)
self.assertIsInstance(species_list_with_one, list)
self.assertGreater(len(species_list_with_one), 0)

# Test with different commonality levels
species_list_low = self.ox_filter.get_species_list(commonality="low")
self.assertIsInstance(species_list_low, list)
self.assertGreater(len(species_list_low), 0)

species_list_medium = self.ox_filter.get_species_list(commonality="medium")
self.assertIsInstance(species_list_medium, list)
self.assertGreater(len(species_list_medium), 0)

species_list_high = self.ox_filter.get_species_list(commonality="high")
self.assertIsInstance(species_list_high, list)
self.assertGreater(len(species_list_high), 0)

# Test with a specific consensus threshold
species_list_threshold = self.ox_filter.get_species_list(consensus=5)
self.assertIsInstance(species_list_threshold, list)
self.assertGreater(len(species_list_threshold), 0)

def test_oxidation_states_filter_species_occurrences(self):
species_occurrences_df = self.ox_filter.get_species_occurrences_df()
species_occurrences_df = self.ox_filter.get_species_occurrences_df(consensus=1)
self.assertIsInstance(species_occurrences_df, pd.DataFrame)
self.assertEqual(
species_occurrences_df.columns.tolist(),
["species", "results_count"],
["element", "species", "results_count", "species_proportion (%)"],
)
self.assertEqual(species_occurrences_df.shape, (490, 2))
self.assertEqual(species_occurrences_df.shape, (490, 4))
self.assertEqual(species_occurrences_df.iloc[0]["species"], "O2-")
self.assertEqual(species_occurrences_df.iloc[0]["results_count"], 116910)
10 changes: 7 additions & 3 deletions smact/utils/crystal_space/generate_composition_with_smact.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def generate_composition_with_smact(
# 2. generate all possible stoichiometric combinations
print("#2. Generating all possible stoichiometric combinations...")

pool = multiprocessing.Pool(processes=multiprocessing.cpu_count() if num_processes is None else num_processes)
pool = multiprocessing.Pool(processes=(multiprocessing.cpu_count() if num_processes is None else num_processes))
compounds = list(
tqdm(
pool.imap_unordered(
Expand Down Expand Up @@ -103,11 +103,15 @@ def generate_composition_with_smact(
] # omit elements without Pauling electronegativity (e.g., He, Ne, Ar, ...)
compounds_pauling = list(itertools.combinations(elements_pauling, num_elements))

pool = multiprocessing.Pool(processes=multiprocessing.cpu_count() if num_processes is None else num_processes)
pool = multiprocessing.Pool(processes=(multiprocessing.cpu_count() if num_processes is None else num_processes))
results = list(
tqdm(
pool.imap_unordered(
partial(smact_filter, threshold=max_stoich, oxidation_states_set=oxidation_states_set),
partial(
smact_filter,
threshold=max_stoich,
oxidation_states_set=oxidation_states_set,
),
compounds_pauling,
),
total=len(compounds_pauling),
Expand Down
Loading
Loading