-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Start updating examples to use comet_ml.start
- Loading branch information
1 parent
9c88d5d
commit ced0900
Showing
99 changed files
with
1,038 additions
and
171,390 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,30 @@ | ||
import comet_ml | ||
import sys | ||
# -*- coding: utf-8 -*- | ||
from comet_ml import Optimizer | ||
|
||
from comet_ml import Experiment, Optimizer | ||
|
||
def run(): | ||
|
||
opt_config = { | ||
|
||
# We pick the Bayes algorithm: | ||
"algorithm": "grid", | ||
|
||
# Declare your hyperparameters in the Vizier-inspired format: | ||
"parameters": { | ||
"x": {"type": "integer", "min": 1, "max": 5}, | ||
}, | ||
|
||
# Declare what we will be optimizing, and how: | ||
"spec": { | ||
"metric": "loss", | ||
"metric": "loss", | ||
"objective": "minimize", | ||
} | ||
}, | ||
} | ||
|
||
# initialize the optimizer object | ||
opt = Optimizer( | ||
config=opt_config | ||
) | ||
|
||
opt = Optimizer(config=opt_config) | ||
|
||
# print Optimizer id | ||
optimizer_id = opt.get_id() | ||
print(optimizer_id) | ||
|
||
if __name__ == '__main__': | ||
run() | ||
|
||
if __name__ == "__main__": | ||
run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 50 additions & 26 deletions
76
guides/MPM/end_to_end_example/data_processing/data_processing.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,68 +1,92 @@ | ||
# -*- coding: utf-8 -*- | ||
import os | ||
from io import StringIO | ||
|
||
import comet_ml | ||
|
||
import pandas as pd | ||
from io import StringIO | ||
import os | ||
|
||
|
||
def get_raw_data(workspace_name: str, artifact_name: str): | ||
""" | ||
In this function, we will check if the raw data exists in Comet Artifacts. If it does, we will download it from there, | ||
if not we will upload it from the local directory. | ||
In this function, we will check if the raw data exists in Comet Artifacts. If it | ||
does, we will download it from there, if not we will upload it from the local | ||
directory. | ||
Once the file is available locally, we will load it into a pandas dataframe and return it. | ||
Once the file is available locally, we will load it into a pandas dataframe and | ||
return it. | ||
""" | ||
exp = comet_ml.get_running_experiment() | ||
|
||
try: | ||
artifact = exp.get_artifact(artifact_name=f"{artifact_name}_raw") | ||
|
||
# Download the artifact | ||
artifact.download(path="./") | ||
except Exception as e: | ||
print(f"Error downloading artifact: {e}") | ||
artifact = comet_ml.Artifact(name=f"{artifact_name}_raw", artifact_type="dataset") | ||
artifact = comet_ml.Artifact( | ||
name=f"{artifact_name}_raw", artifact_type="dataset" | ||
) | ||
artifact.add("./credit_scoring_dataset.csv") | ||
exp.log_artifact(artifact) | ||
|
||
df = pd.read_csv("./credit_scoring_dataset.csv") | ||
return df | ||
|
||
|
||
def preprocess_data(df: pd.DataFrame): | ||
""" | ||
In this function, we will preprocess the data to make it ready for the model. We will store the preprocessed data in a | ||
new Comet Artifact. | ||
In this function, we will preprocess the data to make it ready for the model. We | ||
will store the preprocessed data in a new Comet Artifact. | ||
""" | ||
# Select the relevant columns | ||
df = df.loc[:, ['CustAge', 'CustIncome', 'EmpStatus', 'UtilRate', 'OtherCC', 'ResStatus', 'TmAtAddress', 'TmWBank', | ||
'probdefault']] | ||
|
||
df = df.loc[ | ||
:, | ||
[ | ||
"CustAge", | ||
"CustIncome", | ||
"EmpStatus", | ||
"UtilRate", | ||
"OtherCC", | ||
"ResStatus", | ||
"TmAtAddress", | ||
"TmWBank", | ||
"probdefault", | ||
], | ||
] | ||
|
||
# Rename the target column | ||
df.rename({'probdefault': 'probability_default'}, inplace=True, axis=1) | ||
df.rename({"probdefault": "probability_default"}, inplace=True, axis=1) | ||
|
||
# Convert the categorical columns to category type | ||
for c in ['EmpStatus', 'OtherCC', 'ResStatus']: | ||
df[c] = df[c].astype('category') | ||
for c in ["EmpStatus", "OtherCC", "ResStatus"]: | ||
df[c] = df[c].astype("category") | ||
|
||
# Save the preprocessed data to a new Comet Artifact | ||
csv_buffer = StringIO() | ||
df.to_csv(csv_buffer, index=False) | ||
csv_buffer.seek(0) | ||
|
||
artifact = comet_ml.Artifact(name=f"{artifact_name}_preprocessed", artifact_type="dataset") | ||
|
||
artifact = comet_ml.Artifact( | ||
name=f"{artifact_name}_preprocessed", artifact_type="dataset" | ||
) | ||
artifact.add(local_path_or_data=csv_buffer, logical_path="preprocessed_data.csv") | ||
|
||
exp = comet_ml.get_running_experiment() | ||
exp.log_artifact(artifact) | ||
|
||
return df | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
workspace_name = os.environ["COMET_WORKSPACE"] | ||
project_name = os.environ["COMET_PROJECT_NAME"] | ||
artifact_name = os.environ["COMET_PROJECT_NAME"] | ||
artifact_name = os.environ["COMET_ARTIFACT_NAME"] | ||
|
||
exp = comet_ml.Experiment(workspace=workspace_name, project_name=project_name) | ||
df = get_raw_data(workspace_name, artifact_name) | ||
exp = comet_ml.start(workspace=workspace_name, project_name=project_name) | ||
df = get_raw_data(workspace_name, artifact_name) | ||
|
||
processed_df = preprocess_data(df) | ||
|
||
print("Data preprocessing complete.") | ||
print("Data preprocessing complete.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,68 +1,65 @@ | ||
# -*- coding: utf-8 -*- | ||
import os | ||
|
||
import comet_ml | ||
|
||
import os | ||
import pandas as pd | ||
import numpy as np | ||
import lightgbm as lgb | ||
import numpy as np | ||
import pandas as pd | ||
|
||
|
||
def get_training_data(artifact_name: str) -> pd.DataFrame: | ||
exp = comet_ml.get_running_experiment() | ||
|
||
artifact = exp.get_artifact(artifact_name) | ||
artifact.download(path="./") | ||
|
||
df = pd.read_csv("preprocessed_data.csv") | ||
for c in ['EmpStatus', 'OtherCC', 'ResStatus']: | ||
df[c] = df[c].astype('category') | ||
for c in ["EmpStatus", "OtherCC", "ResStatus"]: | ||
df[c] = df[c].astype("category") | ||
|
||
return df | ||
|
||
|
||
def train_model(training_data: pd.DataFrame, model_name: str) -> lgb.Booster: | ||
exp = comet_ml.get_running_experiment() | ||
|
||
# Create training dataset | ||
X_train = training_data.drop('probability_default', axis=1) | ||
y_train = (training_data['probability_default'] >= 0.5) | ||
X_train = training_data.drop("probability_default", axis=1) | ||
y_train = training_data["probability_default"] >= 0.5 | ||
|
||
training_dataset = lgb.Dataset(data = X_train, | ||
label = y_train) | ||
training_dataset = lgb.Dataset(data=X_train, label=y_train) | ||
|
||
# Train model | ||
params = { | ||
'num_iterations': 30, | ||
'max_depth': 2, | ||
'objective': 'binary', | ||
'metric': ['auc', 'average_precision', 'l1', 'l2'] | ||
"num_iterations": 30, | ||
"max_depth": 2, | ||
"objective": "binary", | ||
"metric": ["auc", "average_precision", "l1", "l2"], | ||
} | ||
model = lgb.train(params = params, | ||
train_set = training_dataset, | ||
valid_sets = training_dataset) | ||
model = lgb.train( | ||
params=params, train_set=training_dataset, valid_sets=training_dataset | ||
) | ||
|
||
# Evaluate model | ||
y_pred = np.where(model.predict(X_train) > 0.5, 1, 0) | ||
experiment.log_confusion_matrix( | ||
y_true=y_train, | ||
y_predicted=y_pred | ||
) | ||
experiment.log_confusion_matrix(y_true=y_train, y_predicted=y_pred) | ||
|
||
# Save model and log to Comet | ||
model.save_model('./model.txt') | ||
experiment.log_model(model_name, './model.txt') | ||
os.remove('./model.txt') | ||
model.save_model("./model.txt") | ||
experiment.log_model(model_name, "./model.txt") | ||
os.remove("./model.txt") | ||
|
||
return model | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
if __name__ == "__main__": | ||
ARTIFACT_NAME = os.environ["COMET_PROJECT_NAME"] | ||
WORKSPACE = os.environ["COMET_WORKSPACE"] | ||
MODEL_REGISTRY_NAME = os.environ["COMET_MODEL_REGISTRY_NAME"] | ||
|
||
# Model training script | ||
experiment = comet_ml.Experiment() | ||
experiment = comet_ml.start() | ||
|
||
training_data = get_training_data(artifact_name=f"{ARTIFACT_NAME}_preprocessed") | ||
model = train_model(training_data, model_name=MODEL_REGISTRY_NAME) | ||
|
||
training_data = get_training_data(artifact_name = f"{ARTIFACT_NAME}_preprocessed") | ||
model = train_model(training_data, model_name = MODEL_REGISTRY_NAME) | ||
|
||
|
||
experiment.register_model(MODEL_REGISTRY_NAME) | ||
experiment.register_model(MODEL_REGISTRY_NAME) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.