Skip to content

Commit

Permalink
Merge pull request #42 from opqrst-chen/add-cyclers
Browse files Browse the repository at this point in the history
Add cyclers [ARBIN, NEWARE]
  • Loading branch information
agiamason authored Jul 4, 2024
2 parents 1f8a95d + c2131f8 commit c47cd6b
Show file tree
Hide file tree
Showing 20 changed files with 853 additions and 75 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,19 @@ batteryml download MATR /path/to/save/raw/data
batteryml preprocess MATR /path/to/save/raw/data /path/to/save/processed/data
```

### Run Cycler Preprocessing Scripts to process your data
If your data is measured by a cycler such as ARBIN, NEWARE, etc., you can use this command to process your data into `BatteryData` of BatteryML.

```bash
batteryml preprocess ARBIN /path/to/save/raw/data /path/to/save/processed/data --config /path/to/config/yaml/file
```

Due to variations in software versions and configurations, the data format and fields exported by the same cycler may differ. Therefore, we have added default processing configurations in the `/configs/cycler` directory to map raw data to target data fields. You can edit these default configurations as needed.

We currently support `ARBIN` and `NEWARE` data formats. Additionally, `Biologic`, `LANDT`, and `Indigo` formats are being integrated. If you encounter any issues with our cycler processing your data, please submit an issue and attach a sample data file to help us ensure rapid compatibility with your data format.



### Run training and/or inference tasks using config files

BatteryML supports using a simple config file to specify the training and inference process. We provided several examples in `configs`. For example, to reproduce the "variance" model for battery life prediction, run
Expand Down
12 changes: 9 additions & 3 deletions batteryml/preprocess/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from .download import DOWNLOAD_LINKS, download_file
from .preprocess_CALCE import CALCEPreprocessor
from .preprocess_HNEI import HNEIPreprocessor
Expand All @@ -7,8 +8,13 @@
from .preprocess_RWTH import RWTHPreprocessor
from .preprocess_SNL import SNLPreprocessor
from .preprocess_UL_PUR import UL_PURPreprocessor
from .preprocess_arbin import ARBINPreprocessor
from .preprocess_neware import NEWAREPreprocessor

logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')

SUPPORTED_SOURCES = [
'CALCE', 'HNEI', 'HUST', 'MATR', 'OX', 'RWTH', 'SNL', 'UL_PUR'
]
SUPPORTED_SOURCES = {
'DATASETS': ['CALCE', 'HNEI', 'HUST', 'MATR', 'OX', 'RWTH', 'SNL', 'UL_PUR'],
'CYCLERS': ['ARBIN', 'BATTERYARCHIVE', "BIOLOGIC", 'INDIGO', "LANDT", "MACCOR", 'NEWARE', 'NOVONIX']
}
42 changes: 30 additions & 12 deletions batteryml/preprocess/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Licensed under the MIT License.
# Copyright (c) Microsoft Corporation.

import os
import logging
from tqdm import tqdm
from typing import List
from pathlib import Path
Expand All @@ -14,22 +16,38 @@ def __init__(self,
self.silent = silent
self.output_dir = Path(output_dir)

def process(self, parentdir: str) -> List[BatteryData]:
def process(self, *args, **kwargs) -> List[BatteryData]:
"""Main logic for preprocessing data."""

def __call__(self, parentdir: str):
batteries = self.process(parentdir)
self.dump(batteries)
def __call__(self, *args, **kwargs):
process_batteries_num, skip_batteries_num = self.process(
*args, **kwargs)
if not self.silent:
self.summary(batteries)
print(f'Successfully processed {process_batteries_num} batteries.')
print(f'Skip processing {skip_batteries_num} batteries.')

def dump(self, batteries: List[BatteryData]):
if not self.silent:
batteries = tqdm(
batteries,
desc=f'Dump batteries to {str(self.output_dir)}')
for battery in batteries:
battery.dump(self.output_dir / f'{battery.cell_id}.pkl')
def check_processed_file(self, processed_file: str):
expected_pkl_path = os.path.join(
self.output_dir, (f"{processed_file}.pkl"))
if os.path.exists(expected_pkl_path) and os.path.getsize(expected_pkl_path) > 0:
logging.info(
f'Skip processing {processed_file}, pkl file already exists and is not empty.')
return True
elif os.path.exists(expected_pkl_path) and os.path.getsize(expected_pkl_path) == 0:
logging.info(
f'Found empty pkl file for {processed_file}.')
return False

# def dump(self, batteries: List[BatteryData]):
# if not self.silent:
# batteries = tqdm(
# batteries,
# desc=f'Dump batteries to {str(self.output_dir)}')
# for battery in batteries:
# battery.dump(self.output_dir / f'{battery.cell_id}.pkl')

def dump_single_file(self, battery: BatteryData):
battery.dump(self.output_dir / f'{battery.cell_id}.pkl')

def summary(self, batteries: List[BatteryData]):
print(f'Successfully processed {len(batteries)} batteries.')
6 changes: 3 additions & 3 deletions batteryml/preprocess/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def memory2str(mem):
print(f'[INFO] {filename} already exists. Skip it.')
return
with open(filename, 'wb') as f:
response = requests.get(url, stream=True)
response = requests.get(url, stream=True, verify=False)
if total_length is None:
total_length = response.headers.get('content-length')
if response.status_code != 200:
Expand All @@ -82,8 +82,8 @@ def memory2str(mem):
total_size = memory2str(total_length)
bar_format = (
f'Downloading {filename}'
'|{percentage:3.0f}%|{bar:20}|{desc}'
'[{elapsed}<{remaining}{postfix}]')
'|{percentage:3.0f}%|{bar:20}|{desc}'
'[{elapsed}<{remaining}{postfix}]')
if update_interval * chunk_size * 100 >= total_length:
update_interval = 1
with tqdm(total=total_length, bar_format=bar_format) as bar:
Expand Down
23 changes: 18 additions & 5 deletions batteryml/preprocess/preprocess_CALCE.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,22 @@

@PREPROCESSORS.register()
class CALCEPreprocessor(BasePreprocessor):
def process(self, parentdir) -> List[BatteryData]:
def process(self, parentdir, **kwargs) -> List[BatteryData]:
path = Path(parentdir)
raw_files = [Path(f) for f in path.glob('*.zip')]
cells = [f.stem for f in raw_files]
if not self.silent:
cells = tqdm(cells)
batteries = []

process_batteries_num = 0
skip_batteries_num = 0
for cell, raw_file in zip(cells, raw_files):
# judge whether to skip the processed file
whether_to_skip = self.check_processed_file(f'CALCE_{cell}')
if whether_to_skip == True:
skip_batteries_num += 1
continue

rawdatadir = raw_file.parent / cell
if not rawdatadir.exists():
if not self.silent:
Expand Down Expand Up @@ -95,7 +103,7 @@ def process(self, parentdir) -> List[BatteryData]:
if 'CX2_16' == cell.upper():
clean_cycles = clean_cycles[1:]

batteries.append(BatteryData(
battery = BatteryData(
cell_id=f'CALCE_{cell}',
form_factor='prismatic',
anode_material='graphite',
Expand All @@ -104,12 +112,17 @@ def process(self, parentdir) -> List[BatteryData]:
nominal_capacity_in_Ah=C,
max_voltage_limit_in_V=4.2,
min_voltage_limit_in_V=2.7
))
)
self.dump_single_file(battery)
process_batteries_num += 1

if not self.silent:
tqdm.write(f'File: {battery.cell_id} dumped to pkl file')

# Remove the inflated directory
shutil.rmtree(rawdatadir)

return batteries
return process_batteries_num, skip_batteries_num


@njit
Expand Down
21 changes: 17 additions & 4 deletions batteryml/preprocess/preprocess_HNEI.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,21 @@

@PREPROCESSORS.register()
class HNEIPreprocessor(BasePreprocessor):
def process(self, parent_dir) -> List[BatteryData]:
def process(self, parent_dir, **kwargs) -> List[BatteryData]:
path = Path(parent_dir)
cells = set(
x.stem.split('_timeseries')[0]
for x in path.glob('*HNEI*timeseries*'))

batteries = []
process_batteries_num = 0
skip_batteries_num = 0
for cell in tqdm(cells, desc='Processing HNEI cells'):
# judge whether to skip the processed file
whether_to_skip = self.check_processed_file(cell)
if whether_to_skip == True:
skip_batteries_num += 1
continue

timeseries_file = next(path.glob(f'*{cell}*timeseries*'))
cycle_data_file = next(path.glob(f'*{cell}*cycle_data*'))
timeseries_df = pd.read_csv(timeseries_file)
Expand All @@ -32,8 +39,14 @@ def process(self, parent_dir) -> List[BatteryData]:
timeseries_df, _ = clean_cell(
timeseries_df, cycle_data_df, shifts=18)
# Capacity is stated here: (https://www.mdpi.com/1996-1073/11/5/1031)
batteries.append(organize_cell(timeseries_df, cell, 2.8))
return batteries
battery = organize_cell(timeseries_df, cell, 2.8)
self.dump_single_file(battery)
process_batteries_num += 1

if not self.silent:
tqdm.write(f'File: {battery.cell_id} dumped to pkl file')

return process_batteries_num, skip_batteries_num


def organize_cell(timeseries_df, name, C):
Expand Down
28 changes: 21 additions & 7 deletions batteryml/preprocess/preprocess_HUST.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

@PREPROCESSORS.register()
class HUSTPreprocessor(BasePreprocessor):
def process(self, parentdir) -> List[BatteryData]:
def process(self, parentdir, **kwargs) -> List[BatteryData]:
raw_file = Path(parentdir) / 'hust_data.zip'

with zipfile.ZipFile(raw_file, 'r') as zip_ref:
Expand All @@ -35,22 +35,31 @@ def process(self, parentdir) -> List[BatteryData]:
if not self.silent:
cell_files = tqdm(
cell_files, desc='Processing HUST cells')
batteries = []

process_batteries_num = 0
skip_batteries_num = 0
for cell_file in cell_files:
cell_id = cell_file.stem
cell_name = f'HUST_{cell_id}'

# judge whether to skip the processed file
whether_to_skip = self.check_processed_file(cell_name)
if whether_to_skip == True:
skip_batteries_num += 1
continue

with open(cell_file, 'rb') as fin:
cell_data = pickle.load(fin)[cell_id]['data']
cycles = []
for cycle in range(len(cell_data)):
df = cell_data[cycle+1]
df = cell_data[cycle + 1]
I = df['Current (mA)'].values / 1000. # noqa
t = df['Time (s)'].values
V = df['Voltage (V)'].values
Qd = calc_Q(I, t, is_charge=False)
Qc = calc_Q(I, t, is_charge=True)
cycles.append(CycleData(
cycle_number=cycle+1,
cycle_number=cycle + 1,
voltage_in_V=V.tolist(),
current_in_A=I.tolist(),
time_in_s=t.tolist(),
Expand All @@ -62,7 +71,7 @@ def process(self, parentdir) -> List[BatteryData]:
# Skip first problematic cycles
if cell_name == 'HUST_7-5':
cycles = cycles[2:]
batteries.append(BatteryData(
battery = BatteryData(
cell_id=cell_name,
cycle_data=cycles,
form_factor='cylindrical_18650',
Expand Down Expand Up @@ -103,12 +112,17 @@ def process(self, parentdir) -> List[BatteryData]:
],
min_voltage_limit_in_V=2.0,
max_voltage_limit_in_V=3.6
))
)
self.dump_single_file(battery)
process_batteries_num += 1

if not self.silent:
tqdm.write(f'File: {battery.cell_id} dumped to pkl file')

# Remove the inflated data
shutil.rmtree(datadir)

return batteries
return process_batteries_num, skip_batteries_num


# See https://www.rsc.org/suppdata/d2/ee/d2ee01676a/d2ee01676a1.pdf
Expand Down
39 changes: 25 additions & 14 deletions batteryml/preprocess/preprocess_MATR.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,18 @@

@PREPROCESSORS.register()
class MATRPreprocessor(BasePreprocessor):
def process(self, parentdir) -> List[BatteryData]:
def process(self, parentdir, **kwargs) -> List[BatteryData]:
raw_files = [
parentdir / 'MATR_batch_20170512.mat',
parentdir / 'MATR_batch_20170630.mat',
parentdir / 'MATR_batch_20180412.mat',
parentdir / 'MATR_batch_20190124.mat',
parentdir / 'MATR_batch_20170512.mat',
parentdir / 'MATR_batch_20170630.mat',
parentdir / 'MATR_batch_20180412.mat',
parentdir / 'MATR_batch_20190124.mat',
]

data_batches = []
if not self.silent:
raw_files = tqdm(raw_files)

for indx, f in enumerate(raw_files):
if hasattr(raw_files, 'set_description'):
raw_files.set_description(f'Loading {f.stem}')
Expand All @@ -35,7 +36,10 @@ def process(self, parentdir) -> List[BatteryData]:

data_batches.append(load_batch(f, indx+1))

return clean_batches(data_batches)
batteries_num = clean_batches(
data_batches, self.dump_single_file, self.silent)

return batteries_num


def load_batch(file, k):
Expand Down Expand Up @@ -100,13 +104,14 @@ def load_batch(file, k):
'cycle_life': cl,
'charge_policy': policy,
'summary': summary,
'cycles': cycle_dict}
'cycles': cycle_dict
}
key = f'b{k}c' + str(i)
bat_dict[key] = cell_dict
return bat_dict


def clean_batches(data_batches):
def clean_batches(data_batches, dump_single_file, silent):
# remove batteries that do not reach 80% capacity
# del data_batches[0]['b1c8']
# del data_batches[0]['b1c10']
Expand Down Expand Up @@ -142,12 +147,18 @@ def clean_batches(data_batches):
data_batches[0][bk]['cycles'][str(last_cycle + j)] = \
data_batches[1][batch2_keys[i]]['cycles'][jk]

cleaned = [
organize_cell(batch[cell], cell)
for batch in data_batches for cell in batch
if cell not in batch2_keys
]
return cleaned
process_batteries_num = 0
skip_batteries_num = 0
for batch in data_batches:
for cell in batch:
if cell not in batch2_keys:
battery = organize_cell(batch[cell], cell)
dump_single_file(battery)
if not silent:
tqdm.write(f'File: {battery.cell_id} dumped to pkl file')
process_batteries_num += 1

return process_batteries_num, skip_batteries_num


def organize_cell(data, name):
Expand Down
Loading

0 comments on commit c47cd6b

Please sign in to comment.