From 33eef29a808f4c93e834c19a33ffb34bebc8384a Mon Sep 17 00:00:00 2001 From: GCR-1178 Date: Wed, 3 Jul 2024 09:35:05 +0000 Subject: [PATCH 1/2] add cycler [ARBIN, NEWARE] and dump single file --- README.md | 13 ++ batteryml/preprocess/__init__.py | 12 +- batteryml/preprocess/base.py | 42 +++- batteryml/preprocess/preprocess_CALCE.py | 23 +- batteryml/preprocess/preprocess_HNEI.py | 21 +- batteryml/preprocess/preprocess_HUST.py | 28 ++- batteryml/preprocess/preprocess_MATR.py | 39 ++-- batteryml/preprocess/preprocess_OX.py | 22 +- batteryml/preprocess/preprocess_RWTH.py | 28 ++- batteryml/preprocess/preprocess_SNL.py | 24 +- batteryml/preprocess/preprocess_UL_PUR.py | 24 +- batteryml/preprocess/preprocess_arbin.py | 226 +++++++++++++++++++ batteryml/preprocess/preprocess_neware.py | 262 ++++++++++++++++++++++ batteryml/utils/config.py | 16 +- bin/batteryml.py | 19 +- configs/cyclers/arbin.yaml | 37 +++ configs/cyclers/arbin_metadata.yaml | 42 ++++ configs/cyclers/neware.yaml | 42 ++++ 18 files changed, 848 insertions(+), 72 deletions(-) create mode 100644 batteryml/preprocess/preprocess_arbin.py create mode 100644 batteryml/preprocess/preprocess_neware.py create mode 100644 configs/cyclers/arbin.yaml create mode 100644 configs/cyclers/arbin_metadata.yaml create mode 100644 configs/cyclers/neware.yaml diff --git a/README.md b/README.md index 8d4c796..8687a75 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,19 @@ batteryml download MATR /path/to/save/raw/data batteryml preprocess MATR /path/to/save/raw/data /path/to/save/processed/data ``` +### Run Cycler Preprocessing Scripts to process your data +If your data is measured by a cycler such as ARBIN, NEWARE, etc., you can use this command to process your data into `BatteryData` of BatteryML. + +```bash +batteryml preprocess ARBIN /path/to/save/raw/data /path/to/save/processed/data --config /path/to/config/yaml/file +``` + +Due to variations in software versions and configurations, the data format and fields exported by the same cycler may differ. Therefore, we have added default processing configurations in the `/configs/cycler` directory to map raw data to target data fields. You can edit these default configurations as needed. + +We currently support `ARBIN` and `NEWARE` data formats. Additionally, `Biologic`, `LANDT`, and `Indigo` formats are being integrated. If you encounter any issues with our cycler processing your data, please submit an issue and attach a sample data file to help us ensure rapid compatibility with your data format. + + + ### Run training and/or inference tasks using config files BatteryML supports using a simple config file to specify the training and inference process. We provided several examples in `configs`. For example, to reproduce the "variance" model for battery life prediction, run diff --git a/batteryml/preprocess/__init__.py b/batteryml/preprocess/__init__.py index 1447f86..2b78577 100644 --- a/batteryml/preprocess/__init__.py +++ b/batteryml/preprocess/__init__.py @@ -1,3 +1,4 @@ +import logging from .download import DOWNLOAD_LINKS, download_file from .preprocess_CALCE import CALCEPreprocessor from .preprocess_HNEI import HNEIPreprocessor @@ -7,8 +8,13 @@ from .preprocess_RWTH import RWTHPreprocessor from .preprocess_SNL import SNLPreprocessor from .preprocess_UL_PUR import UL_PURPreprocessor +from .preprocess_arbin import ARBINPreprocessor +from .preprocess_neware import NEWAREPreprocessor +logging.basicConfig(level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s') -SUPPORTED_SOURCES = [ - 'CALCE', 'HNEI', 'HUST', 'MATR', 'OX', 'RWTH', 'SNL', 'UL_PUR' -] \ No newline at end of file +SUPPORTED_SOURCES = { + 'DATASETS': ['CALCE', 'HNEI', 'HUST', 'MATR', 'OX', 'RWTH', 'SNL', 'UL_PUR'], + 'CYCLERS': ['ARBIN', 'BATTERYARCHIVE', "BIOLOGIC", 'INDIGO', "LANDT", "MACCOR", 'NEWARE', 'NOVONIX'] +} diff --git a/batteryml/preprocess/base.py b/batteryml/preprocess/base.py index f6956cc..3f52c41 100644 --- a/batteryml/preprocess/base.py +++ b/batteryml/preprocess/base.py @@ -1,6 +1,8 @@ # Licensed under the MIT License. # Copyright (c) Microsoft Corporation. +import os +import logging from tqdm import tqdm from typing import List from pathlib import Path @@ -14,22 +16,38 @@ def __init__(self, self.silent = silent self.output_dir = Path(output_dir) - def process(self, parentdir: str) -> List[BatteryData]: + def process(self, *args, **kwargs) -> List[BatteryData]: """Main logic for preprocessing data.""" - def __call__(self, parentdir: str): - batteries = self.process(parentdir) - self.dump(batteries) + def __call__(self, *args, **kwargs): + process_batteries_num, skip_batteries_num = self.process( + *args, **kwargs) if not self.silent: - self.summary(batteries) + print(f'Successfully processed {process_batteries_num} batteries.') + print(f'Skip processing {skip_batteries_num} batteries.') - def dump(self, batteries: List[BatteryData]): - if not self.silent: - batteries = tqdm( - batteries, - desc=f'Dump batteries to {str(self.output_dir)}') - for battery in batteries: - battery.dump(self.output_dir / f'{battery.cell_id}.pkl') + def check_processed_file(self, processed_file: str): + expected_pkl_path = os.path.join( + self.output_dir, (f"{processed_file}.pkl")) + if os.path.exists(expected_pkl_path) and os.path.getsize(expected_pkl_path) > 0: + logging.info( + f'Skip processing {processed_file}, pkl file already exists and is not empty.') + return True + elif os.path.exists(expected_pkl_path) and os.path.getsize(expected_pkl_path) == 0: + logging.info( + f'Found empty pkl file for {processed_file}.') + return False + + # def dump(self, batteries: List[BatteryData]): + # if not self.silent: + # batteries = tqdm( + # batteries, + # desc=f'Dump batteries to {str(self.output_dir)}') + # for battery in batteries: + # battery.dump(self.output_dir / f'{battery.cell_id}.pkl') + + def dump_single_file(self, battery: BatteryData): + battery.dump(self.output_dir / f'{battery.cell_id}.pkl') def summary(self, batteries: List[BatteryData]): print(f'Successfully processed {len(batteries)} batteries.') diff --git a/batteryml/preprocess/preprocess_CALCE.py b/batteryml/preprocess/preprocess_CALCE.py index 6ada30d..70e70e6 100644 --- a/batteryml/preprocess/preprocess_CALCE.py +++ b/batteryml/preprocess/preprocess_CALCE.py @@ -21,14 +21,22 @@ @PREPROCESSORS.register() class CALCEPreprocessor(BasePreprocessor): - def process(self, parentdir) -> List[BatteryData]: + def process(self, parentdir, **kwargs) -> List[BatteryData]: path = Path(parentdir) raw_files = [Path(f) for f in path.glob('*.zip')] cells = [f.stem for f in raw_files] if not self.silent: cells = tqdm(cells) - batteries = [] + + process_batteries_num = 0 + skip_batteries_num = 0 for cell, raw_file in zip(cells, raw_files): + # judge whether to skip the processed file + whether_to_skip = self.check_processed_file(f'CALCE_{cell}') + if whether_to_skip == True: + skip_batteries_num += 1 + continue + rawdatadir = raw_file.parent / cell if not rawdatadir.exists(): if not self.silent: @@ -95,7 +103,7 @@ def process(self, parentdir) -> List[BatteryData]: if 'CX2_16' == cell.upper(): clean_cycles = clean_cycles[1:] - batteries.append(BatteryData( + battery = BatteryData( cell_id=f'CALCE_{cell}', form_factor='prismatic', anode_material='graphite', @@ -104,12 +112,17 @@ def process(self, parentdir) -> List[BatteryData]: nominal_capacity_in_Ah=C, max_voltage_limit_in_V=4.2, min_voltage_limit_in_V=2.7 - )) + ) + self.dump_single_file(battery) + process_batteries_num += 1 + + if not self.silent: + tqdm.write(f'File: {battery.cell_id} dumped to pkl file') # Remove the inflated directory shutil.rmtree(rawdatadir) - return batteries + return process_batteries_num, skip_batteries_num @njit diff --git a/batteryml/preprocess/preprocess_HNEI.py b/batteryml/preprocess/preprocess_HNEI.py index dbed1e2..06be062 100644 --- a/batteryml/preprocess/preprocess_HNEI.py +++ b/batteryml/preprocess/preprocess_HNEI.py @@ -15,14 +15,21 @@ @PREPROCESSORS.register() class HNEIPreprocessor(BasePreprocessor): - def process(self, parent_dir) -> List[BatteryData]: + def process(self, parent_dir, **kwargs) -> List[BatteryData]: path = Path(parent_dir) cells = set( x.stem.split('_timeseries')[0] for x in path.glob('*HNEI*timeseries*')) - batteries = [] + process_batteries_num = 0 + skip_batteries_num = 0 for cell in tqdm(cells, desc='Processing HNEI cells'): + # judge whether to skip the processed file + whether_to_skip = self.check_processed_file(cell) + if whether_to_skip == True: + skip_batteries_num += 1 + continue + timeseries_file = next(path.glob(f'*{cell}*timeseries*')) cycle_data_file = next(path.glob(f'*{cell}*cycle_data*')) timeseries_df = pd.read_csv(timeseries_file) @@ -32,8 +39,14 @@ def process(self, parent_dir) -> List[BatteryData]: timeseries_df, _ = clean_cell( timeseries_df, cycle_data_df, shifts=18) # Capacity is stated here: (https://www.mdpi.com/1996-1073/11/5/1031) - batteries.append(organize_cell(timeseries_df, cell, 2.8)) - return batteries + battery = organize_cell(timeseries_df, cell, 2.8) + self.dump_single_file(battery) + process_batteries_num += 1 + + if not self.silent: + tqdm.write(f'File: {battery.cell_id} dumped to pkl file') + + return process_batteries_num, skip_batteries_num def organize_cell(timeseries_df, name, C): diff --git a/batteryml/preprocess/preprocess_HUST.py b/batteryml/preprocess/preprocess_HUST.py index bfe9154..3c6176a 100644 --- a/batteryml/preprocess/preprocess_HUST.py +++ b/batteryml/preprocess/preprocess_HUST.py @@ -18,7 +18,7 @@ @PREPROCESSORS.register() class HUSTPreprocessor(BasePreprocessor): - def process(self, parentdir) -> List[BatteryData]: + def process(self, parentdir, **kwargs) -> List[BatteryData]: raw_file = Path(parentdir) / 'hust_data.zip' with zipfile.ZipFile(raw_file, 'r') as zip_ref: @@ -35,22 +35,31 @@ def process(self, parentdir) -> List[BatteryData]: if not self.silent: cell_files = tqdm( cell_files, desc='Processing HUST cells') - batteries = [] + + process_batteries_num = 0 + skip_batteries_num = 0 for cell_file in cell_files: cell_id = cell_file.stem cell_name = f'HUST_{cell_id}' + + # judge whether to skip the processed file + whether_to_skip = self.check_processed_file(cell_name) + if whether_to_skip == True: + skip_batteries_num += 1 + continue + with open(cell_file, 'rb') as fin: cell_data = pickle.load(fin)[cell_id]['data'] cycles = [] for cycle in range(len(cell_data)): - df = cell_data[cycle+1] + df = cell_data[cycle + 1] I = df['Current (mA)'].values / 1000. # noqa t = df['Time (s)'].values V = df['Voltage (V)'].values Qd = calc_Q(I, t, is_charge=False) Qc = calc_Q(I, t, is_charge=True) cycles.append(CycleData( - cycle_number=cycle+1, + cycle_number=cycle + 1, voltage_in_V=V.tolist(), current_in_A=I.tolist(), time_in_s=t.tolist(), @@ -62,7 +71,7 @@ def process(self, parentdir) -> List[BatteryData]: # Skip first problematic cycles if cell_name == 'HUST_7-5': cycles = cycles[2:] - batteries.append(BatteryData( + battery = BatteryData( cell_id=cell_name, cycle_data=cycles, form_factor='cylindrical_18650', @@ -103,12 +112,17 @@ def process(self, parentdir) -> List[BatteryData]: ], min_voltage_limit_in_V=2.0, max_voltage_limit_in_V=3.6 - )) + ) + self.dump_single_file(battery) + process_batteries_num += 1 + + if not self.silent: + tqdm.write(f'File: {battery.cell_id} dumped to pkl file') # Remove the inflated data shutil.rmtree(datadir) - return batteries + return process_batteries_num, skip_batteries_num # See https://www.rsc.org/suppdata/d2/ee/d2ee01676a/d2ee01676a1.pdf diff --git a/batteryml/preprocess/preprocess_MATR.py b/batteryml/preprocess/preprocess_MATR.py index bb094a8..d386c15 100644 --- a/batteryml/preprocess/preprocess_MATR.py +++ b/batteryml/preprocess/preprocess_MATR.py @@ -15,17 +15,18 @@ @PREPROCESSORS.register() class MATRPreprocessor(BasePreprocessor): - def process(self, parentdir) -> List[BatteryData]: + def process(self, parentdir, **kwargs) -> List[BatteryData]: raw_files = [ - parentdir / 'MATR_batch_20170512.mat', - parentdir / 'MATR_batch_20170630.mat', - parentdir / 'MATR_batch_20180412.mat', - parentdir / 'MATR_batch_20190124.mat', + parentdir / 'MATR_batch_20170512.mat', + parentdir / 'MATR_batch_20170630.mat', + parentdir / 'MATR_batch_20180412.mat', + parentdir / 'MATR_batch_20190124.mat', ] data_batches = [] if not self.silent: raw_files = tqdm(raw_files) + for indx, f in enumerate(raw_files): if hasattr(raw_files, 'set_description'): raw_files.set_description(f'Loading {f.stem}') @@ -35,7 +36,10 @@ def process(self, parentdir) -> List[BatteryData]: data_batches.append(load_batch(f, indx+1)) - return clean_batches(data_batches) + batteries_num = clean_batches( + data_batches, self.dump_single_file, self.silent) + + return batteries_num def load_batch(file, k): @@ -100,13 +104,14 @@ def load_batch(file, k): 'cycle_life': cl, 'charge_policy': policy, 'summary': summary, - 'cycles': cycle_dict} + 'cycles': cycle_dict + } key = f'b{k}c' + str(i) bat_dict[key] = cell_dict return bat_dict -def clean_batches(data_batches): +def clean_batches(data_batches, dump_single_file, silent): # remove batteries that do not reach 80% capacity # del data_batches[0]['b1c8'] # del data_batches[0]['b1c10'] @@ -142,12 +147,18 @@ def clean_batches(data_batches): data_batches[0][bk]['cycles'][str(last_cycle + j)] = \ data_batches[1][batch2_keys[i]]['cycles'][jk] - cleaned = [ - organize_cell(batch[cell], cell) - for batch in data_batches for cell in batch - if cell not in batch2_keys - ] - return cleaned + process_batteries_num = 0 + skip_batteries_num = 0 + for batch in data_batches: + for cell in batch: + if cell not in batch2_keys: + battery = organize_cell(batch[cell], cell) + dump_single_file(battery) + if not silent: + tqdm.write(f'File: {battery.cell_id} dumped to pkl file') + process_batteries_num += 1 + + return process_batteries_num, skip_batteries_num def organize_cell(data, name): diff --git a/batteryml/preprocess/preprocess_OX.py b/batteryml/preprocess/preprocess_OX.py index da7d58b..fdbc319 100644 --- a/batteryml/preprocess/preprocess_OX.py +++ b/batteryml/preprocess/preprocess_OX.py @@ -14,21 +14,35 @@ @PREPROCESSORS.register() class OXPreprocessor(BasePreprocessor): - def process(self, parentdir) -> List[BatteryData]: + def process(self, parentdir, **kwargs) -> List[BatteryData]: path = Path(parentdir) cells = set( x.stem.split('_timeseries')[0] for x in path.glob('*timeseries*')) - batteries = [] + + process_batteries_num = 0 + skip_batteries_num = 0 for cell in tqdm(cells, desc='Processing OX cells'): + # judge whether to skip the processed file + whether_to_skip = self.check_processed_file(cell) + if whether_to_skip == True: + skip_batteries_num += 1 + continue + timeseries_file = next(path.glob(f'*{cell}*timeseries*')) timeseries_df = pd.read_csv(timeseries_file) # Nominal capacity is 740mAh, which leads to too short # cycle life. No batteries reach 0.74Ah, so we use 0.72Ah # to calculate the cycle life. # https://ora.ox.ac.uk/objects/uuid:03ba4b01-cfed-46d3-9b1a-7d4a7bdf6fac - batteries.append(organize_cell(timeseries_df, cell, 0.72)) - return batteries + battery = organize_cell(timeseries_df, cell, 0.72) + self.dump_single_file(battery) + process_batteries_num += 1 + + if not self.silent: + tqdm.write(f'File: {battery.cell_id} dumped to pkl file') + + return process_batteries_num, skip_batteries_num def organize_cell(timeseries_df, name, C): diff --git a/batteryml/preprocess/preprocess_RWTH.py b/batteryml/preprocess/preprocess_RWTH.py index c654c79..21b5f72 100644 --- a/batteryml/preprocess/preprocess_RWTH.py +++ b/batteryml/preprocess/preprocess_RWTH.py @@ -18,7 +18,7 @@ @PREPROCESSORS.register() class RWTHPreprocessor(BasePreprocessor): - def process(self, parentdir) -> List[BatteryData]: + def process(self, parentdir, **kwargs) -> List[BatteryData]: raw_file = Path(parentdir) / 'RWTH.zip' # Unzip the file first @@ -54,9 +54,18 @@ def process(self, parentdir) -> List[BatteryData]: cells = [f'{i:03}' for i in range(2, 50)] if not self.silent: cells = tqdm(cells) - batteries = [] + + process_batteries_num = 0 + skip_batteries_num = 0 for cell in cells: name = f'RWTH_{cell}' + + # judge whether to skip the processed file + whether_to_skip = self.check_processed_file(name) + if whether_to_skip == True: + skip_batteries_num += 1 + continue + if not self.silent: cells.set_description(f'Processing csv files for cell {name}') files = datadir.glob(f'*{cell}=ZYK*Zyk*.csv') @@ -90,13 +99,14 @@ def process(self, parentdir) -> List[BatteryData]: # Remove abnormal cycles Qds = np.array([max(x.discharge_capacity_in_Ah) for x in cycles]) to_remove = remove_abnormal_cycle(Qds) - cycles = [cycle for i, cycle in enumerate(cycles) if not to_remove[i]] + cycles = [cycle for i, cycle in enumerate( + cycles) if not to_remove[i]] # Organize cell # The nominal capacity is 2.05Ah, but due to quality issue, # approximately 1.85Ah each. Cycling between 20% to 80% SoC # makes its nominal capacity 1.85 * 0.6 = 1.11 Ah. # See https://publications.rwth-aachen.de/record/818642/files/Content_RWTH-2021-04545.pdf # noqa - batteries.append(BatteryData( + battery = BatteryData( cell_id=name, cycle_data=cycles, form_factor='cylindrical_18650', @@ -126,12 +136,18 @@ def process(self, parentdir) -> List[BatteryData]: min_voltage_limit_in_V=3.5, max_voltage_limit_in_V=3.9, max_current_limit_in_A=4 - )) + ) + + self.dump_single_file(battery) + process_batteries_num += 1 + + if not self.silent: + tqdm.write(f'File: {battery.cell_id} dumped to pkl file') # Remove the extracted files shutil.rmtree(subdir) - return batteries + return process_batteries_num, skip_batteries_num @njit diff --git a/batteryml/preprocess/preprocess_SNL.py b/batteryml/preprocess/preprocess_SNL.py index 02d9939..0d155fd 100644 --- a/batteryml/preprocess/preprocess_SNL.py +++ b/batteryml/preprocess/preprocess_SNL.py @@ -15,7 +15,7 @@ @PREPROCESSORS.register() class SNLPreprocessor(BasePreprocessor): - def process(self, parentdir) -> List[BatteryData]: + def process(self, parentdir, **kwargs) -> List[BatteryData]: path = Path(parentdir) cells = set( x.stem.split('_timeseries')[0] @@ -47,8 +47,17 @@ def process(self, parentdir) -> List[BatteryData]: 'SNL_18650_NCA_25C_40-60_0.5-0.5C_b', 'SNL_18650_NMC_25C_20-80_0.5-3C_b'] cells = tuple(cell for cell in cells if cell not in to_drop) - batteries = [] + + process_batteries_num = 0 + skip_batteries_num = 0 for cell in tqdm(cells, desc='Processing SNL cells'): + + # judge whether to skip the processed file + whether_to_skip = self.check_processed_file(cell) + if whether_to_skip == True: + skip_batteries_num += 1 + continue + timeseries_file = next(path.glob(f'*{cell}*timeseries*')) cycle_data_file = next(path.glob(f'*{cell}*cycle_data*')) timeseries_df = pd.read_csv(timeseries_file) @@ -59,8 +68,15 @@ def process(self, parentdir) -> List[BatteryData]: se = False timeseries_df, cycle_data_df = clean_snl_cell( timeseries_df, cycle_data_df, should_exclude=se) - batteries.append(organize_cell(timeseries_df, cell)) - return batteries + + battery = organize_cell(timeseries_df, cell) + self.dump_single_file(battery) + process_batteries_num += 1 + + if not self.silent: + tqdm.write(f'File: {battery.cell_id} dumped to pkl file') + + return process_batteries_num, skip_batteries_num def get_capacity(cell_name): diff --git a/batteryml/preprocess/preprocess_UL_PUR.py b/batteryml/preprocess/preprocess_UL_PUR.py index 5f768ac..4800afe 100644 --- a/batteryml/preprocess/preprocess_UL_PUR.py +++ b/batteryml/preprocess/preprocess_UL_PUR.py @@ -15,14 +15,21 @@ @PREPROCESSORS.register() class UL_PURPreprocessor(BasePreprocessor): - def process(self, parentdir: str) -> List[BatteryData]: + def process(self, parentdir: str, **kwargs) -> List[BatteryData]: path = Path(parentdir) cells = set( x.stem.split('_timeseries')[0] for x in path.glob('*UL-PUR_N*timeseries*')) - batteries = [] + process_batteries_num = 0 + skip_batteries_num = 0 for cell in tqdm(cells, desc='Processing UL-PUR cells'): + # judge whether to skip the processed file + whether_to_skip = self.check_processed_file(cell) + if whether_to_skip == True: + skip_batteries_num += 1 + continue + timeseries_file = next(path.glob(f'*{cell}*timeseries*')) cycle_data_file = next(path.glob(f'*{cell}*cycle_data*')) timeseries_df = pd.read_csv(timeseries_file) @@ -31,9 +38,16 @@ def process(self, parentdir: str) -> List[BatteryData]: continue timeseries_df, _ = clean_cell( timeseries_df, cycle_data_df, shifts=4) - batteries.append(organize_cell( - timeseries_df, cell, get_capacity(cell))) - return batteries + + battery = organize_cell( + timeseries_df, cell, get_capacity(cell)) + self.dump_single_file(battery) + process_batteries_num += 1 + + if not self.silent: + tqdm.write(f'File: {battery.cell_id} dumped to pkl file') + + return process_batteries_num, skip_batteries_num def get_capacity(cell_name): diff --git a/batteryml/preprocess/preprocess_arbin.py b/batteryml/preprocess/preprocess_arbin.py new file mode 100644 index 0000000..51db291 --- /dev/null +++ b/batteryml/preprocess/preprocess_arbin.py @@ -0,0 +1,226 @@ +# Licensed under the MIT License. +# Copyright (c) Microsoft Corporation. + +import os +import logging +import pandas as pd +from tqdm import tqdm +from typing import List +from pathlib import Path + +from batteryml.builders import PREPROCESSORS +from batteryml.utils import import_config +from batteryml.preprocess.base import BasePreprocessor +from batteryml import BatteryData, CycleData, CyclingProtocol + + +@PREPROCESSORS.register() +class ARBINPreprocessor(BasePreprocessor): + def process(self, parentdir, config_path, **kwargs) -> List[BatteryData]: + if config_path is None or str(config_path) == "None": + raise ValueError("Config path is not specified.") + else: + CONFIG_FIELDS = ["column_names", "data_types"] + CONVERSION_CONFIG = import_config(Path(config_path), CONFIG_FIELDS) + + cell_files = [f for f in Path(parentdir).iterdir( + ) if f.is_file() and not f.name.endswith('.yaml')] + + if not self.silent: + cell_files = tqdm( + cell_files, desc='Processing data from ARBIN cycler') + + process_batteries_num = 0 + skip_batteries_num = 0 + for cell_file in cell_files: + whether_to_skip = self.check_processed_file( + "ARBIN_" + cell_file.stem) + if whether_to_skip == True: + skip_batteries_num += 1 + continue + + logging.info(f'Processing cell_file: {cell_file.name}') + + battery = organize_cell_file(cell_file, CONVERSION_CONFIG) + self.dump_single_file(battery) + process_batteries_num += 1 + + if not self.silent: + logging.info(f'File: {battery.cell_id} dumped to pkl file') + + return process_batteries_num, skip_batteries_num + + +def organize_cell_file(cell_file, CONVERSION_CONFIG): + file_readers = { + '.csv': pd.read_csv, + '.xlsx': pd.read_excel, + '.xls': pd.read_excel + } + data = pd.DataFrame() + file_processed = False + cell_file_suffix = cell_file.suffix + try: + if cell_file_suffix in ['.xlsx', '.xls']: + sheets = {sheet_name: df for sheet_name, df in file_readers[cell_file_suffix]( + cell_file, sheet_name=None, index_col=0).items() if sheet_name != "Info"} + for sheet_df in sheets.values(): + data = pd.concat([data, sheet_df]) + file_processed = True + elif cell_file_suffix == '.csv': + data = file_readers[cell_file_suffix](cell_file, index_col=0) + file_processed = True + if not file_processed: + raise ValueError( + f"Unsupported file format: {cell_file.suffix}. Please provide a .csv, .xlsx, or .xls file.") + except Exception as e: + logging.error(f"Error processing file {cell_file}: {e}") + + columns = { + v: k for k, v in CONVERSION_CONFIG["column_names"].items() if v in data.columns} + data.rename(columns=columns, inplace=True) + + data_types = { + k: v for k, v in CONVERSION_CONFIG["data_types"].items() if k in data.columns} + data = data.astype(data_types) + + cycles = data_cycles(data) + + metadata_file_path = cell_file.with_suffix('.metadata.yaml') + metadata_file = metadata_file_path if os.path.exists( + metadata_file_path) else None + metadata = organize_metadata(metadata_file) + + return organize_cell(cell_file.stem, cycles, metadata) + + +def data_cycles(raw_data): + grouped_by_cycle_idx = raw_data.groupby('cycle_index') + columns_to_group_mapping = { + 'step_index': 'step_index', + 'current': 'I', + 'voltage': 'V', + 'charge_capacity': 'Qc', + 'discharge_capacity': 'Qd', + 'charge_energy': 'Ec', + 'discharge_energy': 'Ed', + 'temperature': 'T', + 'internal_resistance': 'IR', + 'test_time': 't', + 'date_time_iso': 'date_time_iso', + } + grouped_data = {} + grouped_data['data_point'] = grouped_by_cycle_idx.apply( + lambda x: (x.index + 1 - x.index[0]).tolist() + ) + for column in columns_to_group_mapping.keys(): + if column in raw_data.columns: + try: + grouped_data[column] = grouped_by_cycle_idx[column].apply(list) + except Exception as e: + logging.warning( + f'Failed to process column {column} to group: {e}') + else: + grouped_data[column] = grouped_by_cycle_idx.apply( + lambda x: [None]*len(x)) + + cycle_dict = {} + all_cycles = set(range(max(grouped_by_cycle_idx.groups.keys()) + 1)) + existing_cycles = set(grouped_by_cycle_idx.groups.keys()) + + missing_cycles = all_cycles - existing_cycles + for missing_cycle in missing_cycles: + logging.warning(f"Data of cycle {missing_cycle} missed.") + + for cdi, i in enumerate(grouped_by_cycle_idx.groups.keys()): + cd = {} + try: + cd['data_point'] = grouped_data['data_point'][i] + for field in columns_to_group_mapping.keys(): + if field == 'internal_resistance': + ##################################################################### + # Assume the last IR of each cycle is representative of that cycle. # + ##################################################################### + cd['IR'] = grouped_data[field][i][-1] + elif field == 'test_time': + min_date_time = min(grouped_data[field][i]) + cd['t'] = [ + time - min_date_time for time in grouped_data[field][i]] + else: + cd[columns_to_group_mapping[field]] = grouped_data[field][i] + except Exception as e: + logging.warning(f"Error processing field '{field}' in cycle {i}") + cycle_dict[str(cdi)] = cd + + return cycle_dict + + +# Need adjusting to custom metadata +def organize_metadata(meta_path): + METADATA_CONFIG_FIELDS = ["form_factor", "anode_material", "cathode_material", + "nominal_capacity_in_Ah", + "min_voltage_limit_in_V", "max_voltage_limit_in_V", + "charge_protocol", "discharge_protocol"] + METADATA_CONFIG = {field: None for field in METADATA_CONFIG_FIELDS} + + try: + if meta_path is None or str(meta_path) == "None": + raise ValueError("Metadata config path is not specified.") + config = import_config(Path(meta_path), METADATA_CONFIG_FIELDS) + METADATA_CONFIG.update(config) + except (ValueError, FileNotFoundError) as e: + logging.error(e) + + charge_protocols = [CyclingProtocol( + **cp) for cp in METADATA_CONFIG.get('charge_protocol', []) or []] + discharge_protocols = [CyclingProtocol( + **dp) for dp in METADATA_CONFIG.get('discharge_protocol', []) or []] + + metadata = { + "form_factor": METADATA_CONFIG.get("form_factor"), + "anode_material": METADATA_CONFIG.get("anode_material"), + "cathode_material": METADATA_CONFIG.get("cathode_material"), + "charge_protocol": charge_protocols, + "discharge_protocol": discharge_protocols, + "nominal_capacity_in_Ah": METADATA_CONFIG.get("nominal_capacity_in_Ah"), + "min_voltage_limit_in_V": METADATA_CONFIG.get("min_voltage_limit_in_V"), + "max_voltage_limit_in_V": METADATA_CONFIG.get("max_voltage_limit_in_V") + } + return metadata + + +def organize_cell(name, cycles, metadata): + cycle_data = [] + for cycle_idx, cycle in cycles.items(): + # Skip the first cycle if it is necessary + # if int(cycle_idx) == 0: + # continue + cycle_data.append(CycleData( + cycle_number=cycle_idx, + voltage_in_V=cycle['V'], + current_in_A=cycle['I'], + charge_capacity_in_Ah=cycle['Qc'], + discharge_capacity_in_Ah=cycle['Qd'], + time_in_s=cycle['t'], + temperature_in_C=cycle['T'], + internal_resistance_in_ohm=cycle['IR'], + + energy_charge=cycle['Ec'], + energy_discharge=cycle['Ed'], + step_index=cycle['step_index'], + data_point=cycle['data_point'], + date_time_iso=cycle['date_time_iso'] + )) + + return BatteryData( + cell_id=f'ARBIN_{name}', + cycle_data=cycle_data, + form_factor=metadata["form_factor"], + anode_material=metadata["anode_material"], + cathode_material=metadata["cathode_material"], + charge_protocol=metadata["charge_protocol"], + discharge_protocol=metadata["discharge_protocol"], + nominal_capacity_in_Ah=metadata["nominal_capacity_in_Ah"], + min_voltage_limit_in_V=metadata["min_voltage_limit_in_V"], + max_voltage_limit_in_V=metadata["max_voltage_limit_in_V"] + ) diff --git a/batteryml/preprocess/preprocess_neware.py b/batteryml/preprocess/preprocess_neware.py new file mode 100644 index 0000000..1122a03 --- /dev/null +++ b/batteryml/preprocess/preprocess_neware.py @@ -0,0 +1,262 @@ +# Licensed under the MIT License. +# Copyright (c) Microsoft Corporation. + +import os +import logging +import numpy as np +import pandas as pd +from tqdm import tqdm +from typing import List +from pathlib import Path + +from batteryml.builders import PREPROCESSORS +from batteryml.utils import import_config +from batteryml.preprocess.base import BasePreprocessor +from batteryml import BatteryData, CycleData, CyclingProtocol + + +@PREPROCESSORS.register() +class NEWAREPreprocessor(BasePreprocessor): + def process(self, parentdir, config_path, **kwargs) -> List[BatteryData]: + if config_path is None or str(config_path) == "None": + raise ValueError("Config path is not specified.") + else: + CONFIG_FIELDS = ["column_names", "data_types", "scales"] + CONVERSION_CONFIG = import_config(Path(config_path), CONFIG_FIELDS) + + cell_files = [f for f in Path(parentdir).iterdir( + ) if f.is_file() and not f.name.endswith('.yaml')] + + if not self.silent: + cell_files = tqdm( + cell_files, desc='Processing data from NEWARE cycler') + + process_batteries_num = 0 + skip_batteries_num = 0 + for cell_file in cell_files: + whether_to_skip = self.check_processed_file( + "NEWARE_"+cell_file.stem) + if whether_to_skip == True: + skip_batteries_num += 1 + continue + + logging.info(f'Processing cell_file: {cell_file.name}') + + battery = organize_cell_file(cell_file, CONVERSION_CONFIG) + self.dump_single_file(battery) + process_batteries_num += 1 + + if not self.silent: + logging.info(f'File: {battery.cell_id} dumped to pkl file') + + return process_batteries_num, skip_batteries_num + + +def organize_cell_file(cell_file, CONVERSION_CONFIG): + ir_column_name = '"DCIR(O)"' + + record_data = [] + with open(cell_file, encoding="ISO-8859-1") as input: + cycle_header = input.readline().replace("\t", "") + step_header = input.readline().replace("\t", "") + ir_index = step_header.split(",").index(ir_column_name) + record_header = input.readline().replace("\t", "").split(",") + record_header[0] = cycle_header.split(",")[0] + record_header[1] = step_header.split(",")[1] + record_header[22] = ir_column_name + record_header = ",".join(record_header) + record_header = record_header.encode("ascii", "ignore").decode() + + cycle_number = 0 + step_number = 0 + ir_value = None + for line in input: + if line[:2] == r',"': # step data + step_number = line.split(",")[1] + ir_value = line.split(",")[ir_index] + elif line[:2] == r",,": # record data + line_list = line.split(",") + line_list[0] = cycle_number + line_list[1] = step_number + line_list[22] = ir_value + record_data.append(line_list) + else: # cycle data + cycle_number = line.split(",")[0] + + cleaned_columns = [col.replace('"', '') + for col in record_header.split(",")] + record_df = pd.DataFrame(record_data, columns=cleaned_columns) + record_df = record_df.replace({'\t': '', '"': ''}, regex=True) + + data = record_df.loc[:, ~record_df.columns.str.contains("Unnamed")] + + data["Time(h:min:s.ms)"] = data["Time(h:min:s.ms)"].apply( + lambda x: 3600 * float(x.split(":")[-3]) + 60 * float(x.split(":")[-2]) + float(x.split(":")[-1])) + + # Deal with missing data in the internal resistance + data["DCIR(O)"] = data["DCIR(O)"].apply( + lambda x: np.nan if x == "-" else x + ) + + columns = { + v: k for k, v in CONVERSION_CONFIG["column_names"].items() if v in data.columns} + data.rename(columns=columns, inplace=True) + + data_types = { + k: v for k, v in CONVERSION_CONFIG["data_types"].items() if k in data.columns} + data = data.astype(data_types) + + scales = {k: v for k, + v in CONVERSION_CONFIG["scales"].items() if k in data.columns} + for column, scale in scales.items(): + data[column] *= scale + + data["internal_resistance"] = data["internal_resistance"].ffill() + data["internal_resistance"] = data["internal_resistance"].bfill() + + data["test_time"] = ( + data["step_time"].diff().fillna(0).apply( + lambda x: 0 if x < 0 else x).cumsum() + ) + + cycles = data_cycles(data) + + metadata_file_path = cell_file.with_suffix('.metadata.yaml') + metadata_file = metadata_file_path if os.path.exists( + metadata_file_path) else None + metadata = organize_metadata(metadata_file) + + return organize_cell(cell_file.stem, cycles, metadata) + + +def data_cycles(raw_data): + grouped_by_cycle_idx = raw_data.groupby('cycle_index') + columns_to_group_mapping = { + 'data_point': 'data_point', + 'step_index': 'step_index', + 'current': 'I', + 'voltage': 'V', + 'charge_capacity': 'Qc', + 'discharge_capacity': 'Qd', + 'charge_energy': 'Ec', + 'discharge_energy': 'Ed', + 'temperature': 'T', + 'internal_resistance': 'IR', + 'test_time': 't', + 'date_time': 'date_time_iso', + } + grouped_data = {} + grouped_data['data_point'] = grouped_by_cycle_idx.apply( + lambda x: (x.index + 1 - x.index[0]).tolist() + ) + for column in columns_to_group_mapping.keys(): + if column in raw_data.columns: + try: + grouped_data[column] = grouped_by_cycle_idx[column].apply(list) + except Exception as e: + logging.warning( + f'Failed to process column {column} to group: {e}') + else: + grouped_data[column] = grouped_by_cycle_idx.apply( + lambda x: [None]*len(x)) + + cycle_dict = {} + all_cycles = set(range(max(grouped_by_cycle_idx.groups.keys()) + 1)) + existing_cycles = set(grouped_by_cycle_idx.groups.keys()) + + missing_cycles = all_cycles - existing_cycles + for missing_cycle in missing_cycles: + logging.warning(f"Data of cycle {missing_cycle} missed.") + + for cdi, i in enumerate(grouped_by_cycle_idx.groups.keys()): + cd = {} + try: + cd['data_point'] = grouped_data['data_point'][i] + for field in columns_to_group_mapping.keys(): + if field == 'internal_resistance': + ##################################################################### + # Assume the last IR of each cycle is representative of that cycle. # + ##################################################################### + cd['IR'] = grouped_data[field][i][-1] + elif field == 'test_time': + min_date_time = min(grouped_data[field][i]) + cd['t'] = [ + time - min_date_time for time in grouped_data[field][i]] + else: + cd[columns_to_group_mapping[field]] = grouped_data[field][i] + except Exception as e: + logging.warning( + f"Error processing field '{field}' in cycle {i}") + cycle_dict[str(cdi)] = cd + + return cycle_dict + + +# Need adjusting to custom metadata +def organize_metadata(meta_path): + METADATA_CONFIG_FIELDS = ["form_factor", "anode_material", "cathode_material", + "nominal_capacity_in_Ah", + "min_voltage_limit_in_V", "max_voltage_limit_in_V", + "charge_protocol", "discharge_protocol"] + METADATA_CONFIG = {field: None for field in METADATA_CONFIG_FIELDS} + + try: + if meta_path is None or str(meta_path) == "None": + raise ValueError("Metadata config path is not specified.") + config = import_config(Path(meta_path), METADATA_CONFIG_FIELDS) + METADATA_CONFIG.update(config) + except (ValueError, FileNotFoundError) as e: + logging.error(e) + + charge_protocols = [CyclingProtocol( + **cp) for cp in METADATA_CONFIG.get('charge_protocol', []) or []] + discharge_protocols = [CyclingProtocol( + **dp) for dp in METADATA_CONFIG.get('discharge_protocol', []) or []] + + metadata = { + "form_factor": METADATA_CONFIG.get("form_factor"), + "anode_material": METADATA_CONFIG.get("anode_material"), + "cathode_material": METADATA_CONFIG.get("cathode_material"), + "charge_protocol": charge_protocols, + "discharge_protocol": discharge_protocols, + "nominal_capacity_in_Ah": METADATA_CONFIG.get("nominal_capacity_in_Ah"), + "min_voltage_limit_in_V": METADATA_CONFIG.get("min_voltage_limit_in_V"), + "max_voltage_limit_in_V": METADATA_CONFIG.get("max_voltage_limit_in_V") + } + return metadata + + +def organize_cell(name, cycles, metadata): + cycle_data = [] + for cycle_idx, cycle in cycles.items(): + # Skip the first cycle if it is necessary + # if int(cycle_idx) == 0: + # continue + cycle_data.append(CycleData( + cycle_number=cycle_idx, + voltage_in_V=cycle['V'], + current_in_A=cycle['I'], + charge_capacity_in_Ah=cycle['Qc'], + discharge_capacity_in_Ah=cycle['Qd'], + time_in_s=cycle['t'], + temperature_in_C=cycle['T'], + internal_resistance_in_ohm=cycle['IR'], + + energy_charge=cycle['Ec'], + energy_discharge=cycle['Ed'], + step_index=cycle['step_index'], + data_point=cycle['data_point'] + )) + + return BatteryData( + cell_id=f'NEWARE_{name}', + cycle_data=cycle_data, + form_factor=metadata["form_factor"], + anode_material=metadata["anode_material"], + cathode_material=metadata["cathode_material"], + charge_protocol=metadata["charge_protocol"], + discharge_protocol=metadata["discharge_protocol"], + nominal_capacity_in_Ah=metadata["nominal_capacity_in_Ah"], + min_voltage_limit_in_V=metadata["min_voltage_limit_in_V"], + max_voltage_limit_in_V=metadata["max_voltage_limit_in_V"] + ) diff --git a/batteryml/utils/config.py b/batteryml/utils/config.py index b4bdb5b..900c2e4 100644 --- a/batteryml/utils/config.py +++ b/batteryml/utils/config.py @@ -6,6 +6,7 @@ import os from addict import Dict + def import_config(path: str, attr: list): """_summary_ @@ -27,6 +28,18 @@ def import_config(path: str, attr: list): if not isinstance(attr, list): attr = [attr] + + # Check if expected attributes are present in the config file + missing_fields = [field for field in attr if field not in config] + if missing_fields: + raise ValueError(f"Missing expected config fields: {missing_fields}") + + for field in attr: + value = config[field] + # Check if the value is not None or empty + if value is None or (isinstance(value, (str, list)) and not value): + raise ValueError(f"Invalid value for config field: {field}") + return { field: getattr(config, field) if hasattr(config, field) else {} for field in attr @@ -36,6 +49,7 @@ def import_config(path: str, attr: list): class YamlHandler: """handle yaml file """ + def __init__(self, file_path): """ YamlHandler init Parameters @@ -44,7 +58,7 @@ def __init__(self, file_path): yaml file path of config """ if not os.path.exists(file_path): - return FileExistsError(OSError) + raise FileNotFoundError(f"The file {file_path} does not exist.") self.file_path = file_path def read_yaml(self, encoding='utf-8'): diff --git a/bin/batteryml.py b/bin/batteryml.py index ead542c..2f8ae47 100755 --- a/bin/batteryml.py +++ b/bin/batteryml.py @@ -21,10 +21,10 @@ def main(): # download command download_parser = subparsers.add_parser( - "download", help="Download raw files for public datasets") + "download", help="Download raw files for public datasets") download_parser.add_argument( "dataset", choices=list(DOWNLOAD_LINKS.keys()), - help="Public dataset to download") + help="Public dataset to download") download_parser.add_argument( "output_dir", help="Directory to save the raw data files") download_parser.set_defaults(func=download) @@ -34,11 +34,14 @@ def main(): "preprocess", help="Organize the raw data files into BatteryData and save to disk") preprocess_parser.add_argument( - "input_type", choices=SUPPORTED_SOURCES, + "input_type", choices=[value for values in SUPPORTED_SOURCES.values() for value in values], help="Type of input raw files. For public datasets, specific " "preprocessor will be called. For standard battery test " "output files, the corresponding preprocessing logic " "will be applied.") + preprocess_parser.add_argument( + "--config", default="None", + help="Path to the config file of Cycler.") preprocess_parser.add_argument( "raw_dir", help="Directory of raw input files.") preprocess_parser.add_argument( @@ -94,22 +97,24 @@ def download(args): def preprocess(args): - assert os.path.exists(args.raw_dir), f'Input path not exist: {args.raw_dir}' + assert os.path.exists( + args.raw_dir), f'Input path not exist: {args.raw_dir}' if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) + config_path = Path(args.config) input_path, output_path = Path(args.raw_dir), Path(args.output_dir) processor = PREPROCESSORS.build(dict( name=f'{args.input_type}Preprocessor', output_dir=output_path, silent=args.silent )) - processor(input_path) + processor(input_path, config_path=config_path) def run(args): - # Convert skip_if_executed to boolean - args.skip_if_executed = args.skip_if_executed.lower() in ['true', '1', 'yes'] + # Convert skip_if_executed to boolean + args.skip_if_executed = args.skip_if_executed.lower() in ['true', '1', 'yes'] pipeline = Pipeline(args.config, args.workspace) model, dataset = None, None # Reuse to save setup cost if args.train: diff --git a/configs/cyclers/arbin.yaml b/configs/cyclers/arbin.yaml new file mode 100644 index 0000000..4ff15c2 --- /dev/null +++ b/configs/cyclers/arbin.yaml @@ -0,0 +1,37 @@ +column_names: + # columns names of raw_data: + # ['Test_Time', 'DateTime', 'Step_Time', 'Step_Index', 'Cycle_Index', 'Current', 'Voltage', 'Charge_Capacity', 'Discharge_Capacity', 'Charge_Energy', 'Discharge_Energy', 'dV/dt', 'Internal_Resistance', 'Temperature'] + test_time: 'Test_Time' + date_time: 'DateTime' + step_time: 'Step_Time' + step_index: 'Step_Index' + cycle_index: 'Cycle_Index' + current: 'Current' + voltage: 'Voltage' + charge_capacity: 'Charge_Capacity' + discharge_capacity: 'Discharge_Capacity' + charge_energy: 'Charge_Energy' + discharge_energy: 'Discharge_Energy' + _dv/dt: 'dV/dt' + internal_resistance: 'Internal_Resistance' + temperature: 'Temperature' + + # none: + data_point: 'Data_Point' + +data_types: + data_point: 'int32' + test_time: 'float64' + date_time: 'float32' + step_time: 'float32' + step_index: 'int16' + cycle_index: 'int32' + current: 'float32' + voltage: 'float32' + charge_capacity: 'float64' + discharge_capacity: 'float64' + charge_energy: 'float64' + discharge_energy: 'float64' + dv/dt: 'float32' + internal_resistance: 'float32' + temperature: 'float32' \ No newline at end of file diff --git a/configs/cyclers/arbin_metadata.yaml b/configs/cyclers/arbin_metadata.yaml new file mode 100644 index 0000000..1a63398 --- /dev/null +++ b/configs/cyclers/arbin_metadata.yaml @@ -0,0 +1,42 @@ +form_factor: 1 +anode_material: 2 +cathode_material: 3 +nominal_capacity_in_Ah: None +min_voltage_limit_in_V: None +max_voltage_limit_in_V: None + +charge_protocol: + - rate_in_C: 0.5 + current_in_A: 1.2 + voltage_in_V: 3.7 + power_in_W: null + start_voltage_in_V: null + start_soc: 0.0 + end_voltage_in_V: 4.2 + end_soc: 1.0 + - rate_in_C: 1.0 + current_in_A: 2.0 + voltage_in_V: 3.8 + power_in_W: null + start_voltage_in_V: null + start_soc: 0.0 + end_voltage_in_V: 4.1 + end_soc: 1.0 + +discharge_protocol: + - rate_in_C: 0.5 + current_in_A: 1.0 + voltage_in_V: 3.2 + power_in_W: null + start_voltage_in_V: null + start_soc: 1.0 + end_voltage_in_V: 2.5 + end_soc: 0.0 + - rate_in_C: 0.8 + current_in_A: 1.5 + voltage_in_V: 3.3 + power_in_W: null + start_voltage_in_V: null + start_soc: 1.0 + end_voltage_in_V: 2.7 + end_soc: 0.0 diff --git a/configs/cyclers/neware.yaml b/configs/cyclers/neware.yaml new file mode 100644 index 0000000..ccf76d0 --- /dev/null +++ b/configs/cyclers/neware.yaml @@ -0,0 +1,42 @@ +column_names: + # columns names of raw_data: + # ['Cycle ID', 'Step ID', 'Record ID', 'Time(h:min:s.ms)', 'Voltage(V)', 'Current(mA)', 'Temperature(C)', 'Capacity(mAh)', 'Capacity Density(mAh/g)', 'Energy(mWh)', 'CmpEng(mWh/g)', 'Realtime', 'Min-T(C)', 'Max-T(C)', 'Avg-T(C)', 'Power(mW)', 'Capacitance_Chg(mAh)', 'Capacitance_DChg(mAh)', 'Engy_Chg(mWh)', 'Engy_DChg(mWh)', 'dQ/dV(mAh/V)', 'dQm/dV(mAh/V.g)', 'DCIR(O)'] + cycle_index: 'Cycle ID' + step_index: 'Step ID' + voltage: 'Voltage(V)' + current: 'Current(mA)' + discharge_capacity: 'Capacitance_DChg(mAh)' + charge_capacity: 'Capacitance_Chg(mAh)' + charge_energy: 'Engy_Chg(mWh)' + discharge_energy: 'Engy_DChg(mWh)' + date_time: 'Realtime' + data_point: 'Record ID' + step_time: 'Time(h:min:s.ms)' + temperature: 'Temperature(C)' + internal_resistance: 'DCIR(O)' + +data_types: + data_point: 'int32' + test_time: 'float64' + step_time: 'float32' + step_index: 'int16' + cycle_index: 'int32' + current: 'float32' + voltage: 'float32' + discharge_capacity: 'float64' + charge_capacity: 'float64' + charge_energy: 'float64' + discharge_energy: 'float64' + internal_resistance: 'float32' + temperature: 'float32' + +# for the conversion of variable units: +# example: +# time: ms -> s +# time: 0.001 +scales: + current: 0.001 + charge_capacity: 0.001 + discharge_capacity: 0.001 + charge_energy: 0.001 + discharge_energy: 0.001 From c2131f8f2c1eca8af1500188fe34aa985e33818c Mon Sep 17 00:00:00 2001 From: GCR-1178 Date: Wed, 3 Jul 2024 09:35:44 +0000 Subject: [PATCH 2/2] update dependencies --- batteryml/preprocess/download.py | 6 +++--- requirements.txt | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/batteryml/preprocess/download.py b/batteryml/preprocess/download.py index 709c4fc..65db334 100644 --- a/batteryml/preprocess/download.py +++ b/batteryml/preprocess/download.py @@ -69,7 +69,7 @@ def memory2str(mem): print(f'[INFO] {filename} already exists. Skip it.') return with open(filename, 'wb') as f: - response = requests.get(url, stream=True) + response = requests.get(url, stream=True, verify=False) if total_length is None: total_length = response.headers.get('content-length') if response.status_code != 200: @@ -82,8 +82,8 @@ def memory2str(mem): total_size = memory2str(total_length) bar_format = ( f'Downloading {filename}' - '|{percentage:3.0f}%|{bar:20}|{desc}' - '[{elapsed}<{remaining}{postfix}]') + '|{percentage:3.0f}%|{bar:20}|{desc}' + '[{elapsed}<{remaining}{postfix}]') if update_interval * chunk_size * 100 >= total_length: update_interval = 1 with tqdm(total=total_length, bar_format=bar_format) as bar: diff --git a/requirements.txt b/requirements.txt index 865be1c..9172e8c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,5 @@ numba matplotlib h5py openpyxl +requests +xgboost \ No newline at end of file