generated from amosproj/amos202Xss0Y-projname
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #39 from amosproj/feature/evp-skeleton
Implemented the EVP skeleton class. Issue #22
- Loading branch information
Showing
19 changed files
with
1,187 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,7 @@ pids | |
# Python | ||
*.pyc | ||
__pycache__/ | ||
Pipfile.lock | ||
# Pipfile.lock | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]> | ||
|
||
from .database_dummy import DatabaseDummy | ||
|
||
_database = None | ||
|
||
|
||
def get_database() -> DatabaseDummy: | ||
global _database | ||
if _database is None: | ||
_database = DatabaseDummy() | ||
return _database |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]> | ||
|
||
import json | ||
from typing import List | ||
|
||
from database.models import Lead | ||
from database.parsers import LeadParser | ||
|
||
|
||
class DatabaseDummy: | ||
def __init__(self) -> None: | ||
with open("src/database/dummy_leads.json") as f: | ||
json_data = json.load(f)["training_leads"] | ||
self.data = {d["lead_id"]: d for d in json_data} | ||
|
||
def get_lead_by_id(self, id_: int) -> Lead: | ||
return LeadParser.parse_lead_from_dict(self.data[id_]) | ||
|
||
def get_all_leads(self) -> List[Lead]: | ||
return [LeadParser.parse_lead_from_dict(entry) for entry in self.data.values()] | ||
|
||
def update_lead(self, lead: Lead): | ||
print(f"Updating database entry for lead#{lead.lead_id}") | ||
print(f"Update values: {lead}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
{ | ||
"training_leads": [ | ||
{ | ||
"lead_id": 0, | ||
"annual_income": 25000, | ||
"product_of_interest": "Terminals", | ||
"first_name": "Anton", | ||
"last_name": "Kerner", | ||
"phone_number": "49176123123", | ||
"email_address": "[email protected]", | ||
"customer_probability": 0.1, | ||
"life_time_value": 400000 | ||
}, | ||
{ | ||
"lead_id": 1, | ||
"annual_income": 70000, | ||
"product_of_interest": "Terminals", | ||
"first_name": "Anton", | ||
"last_name": "Kerner", | ||
"phone_number": "49176123123", | ||
"email_address": "[email protected]", | ||
"customer_probability": 0.4, | ||
"life_time_value": 40000 | ||
}, | ||
{ | ||
"lead_id": 2, | ||
"annual_income": 15000, | ||
"product_of_interest": "Terminals", | ||
"first_name": "Anton", | ||
"last_name": "Kerner", | ||
"phone_number": "49176123123", | ||
"email_address": "[email protected]", | ||
"customer_probability": 0.8, | ||
"life_time_value": 40000 | ||
}, | ||
{ | ||
"lead_id": 3, | ||
"annual_income": 2500000, | ||
"product_of_interest": "Terminals", | ||
"first_name": "Anton", | ||
"last_name": "Kerner", | ||
"phone_number": "49176123123", | ||
"email_address": "[email protected]", | ||
"customer_probability": 0.08, | ||
"life_time_value": 400000 | ||
}, | ||
{ | ||
"lead_id": 4, | ||
"annual_income": 1200, | ||
"product_of_interest": "Terminals", | ||
"first_name": "Anton", | ||
"last_name": "Kerner", | ||
"phone_number": "49176123123", | ||
"email_address": "[email protected]", | ||
"customer_probability": 0.9, | ||
"life_time_value": 3400.23 | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
SPDX-License-Identifier: CC-BY-4.0 | ||
SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]> | ||
|
||
from enum import Enum, IntEnum | ||
from typing import List, Optional | ||
|
||
from pydantic import BaseModel, EmailStr, Field | ||
|
||
|
||
class AnnualIncome(IntEnum): | ||
Nothing = 0 # 0€ | ||
Class1 = 1 # (0€, 35000€] | ||
Class2 = 35001 # (35000€, 60000€] | ||
Class3 = 60001 # (60000€, 100000€] | ||
Class4 = 100001 # (100000€, 200000€] | ||
Class5 = 200001 # (200000€, 400000€] | ||
Class6 = 400001 # (400000€, 600000€] | ||
Class7 = 600001 # (600000€, 1000000€] | ||
Class8 = 1000001 # (1000000€, 2000000€] | ||
Class9 = 2000001 # (2000000€, 5000000€] | ||
Class10 = 5000001 # (5000000€, inf€] | ||
|
||
|
||
class ProductOfInterest(str, Enum): | ||
Nothing = "Nothing" | ||
Terminals = "Terminals" | ||
CashRegisterSystem = "Cash Register System" | ||
BusinessAccount = "Business Account" | ||
All = "All" | ||
Other = "Other" | ||
|
||
|
||
class LeadValue(BaseModel): | ||
life_time_value: float = Field(..., ge=0) | ||
customer_probability: float = Field(..., ge=0, le=1) | ||
|
||
def get_lead_value(self) -> float: | ||
return self.life_time_value * self.customer_probability | ||
|
||
|
||
class Lead(BaseModel): | ||
lead_id: int # could be expended to a UUID later | ||
first_name: str | ||
last_name: str | ||
email_address: EmailStr | ||
phone_number: str | ||
annual_income: AnnualIncome | ||
product_of_interest: ProductOfInterest | ||
lead_value: Optional[LeadValue] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]> | ||
|
||
from typing import Dict | ||
|
||
from database.models import AnnualIncome, Lead, LeadValue, ProductOfInterest | ||
|
||
|
||
class LeadParser: | ||
@staticmethod | ||
def parse_lead_from_dict(data: Dict) -> Lead: | ||
customer_probability = ( | ||
data["customer_probability"] | ||
if "customer_probability" in data.keys() | ||
else None | ||
) | ||
life_time_value = ( | ||
data["life_time_value"] if "life_time_value" in data.keys() else None | ||
) | ||
|
||
if customer_probability is not None and life_time_value is not None: | ||
lead_value = LeadValue( | ||
life_time_value=life_time_value, | ||
customer_probability=customer_probability, | ||
) | ||
else: | ||
lead_value = None | ||
|
||
annual_income = AnnualIncome.Nothing | ||
for income_value in AnnualIncome: | ||
if data["annual_income"] < income_value: | ||
break | ||
annual_income = income_value | ||
|
||
return Lead( | ||
lead_id=data["lead_id"], | ||
first_name=data["first_name"], | ||
last_name=data["last_name"], | ||
email_address=data["email_address"], | ||
phone_number=data["phone_number"], | ||
annual_income=annual_income, | ||
product_of_interest=ProductOfInterest(data["product_of_interest"]), | ||
lead_value=lead_value, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]> | ||
|
||
import numpy as np | ||
from sklearn.linear_model import LinearRegression | ||
|
||
from database import get_database | ||
from database.models import LeadValue | ||
|
||
|
||
class EstimatedValuePredictor: | ||
def __init__(self) -> None: | ||
self.probability_predictor = LinearRegression() | ||
self.life_time_value_predictor = LinearRegression() | ||
|
||
all_leads = get_database().get_all_leads() | ||
X = np.identity(len(all_leads)) | ||
y_probability = np.array( | ||
[lead.lead_value.customer_probability for lead in all_leads] | ||
) | ||
y_value = np.array([lead.lead_value.life_time_value for lead in all_leads]) | ||
|
||
self.probability_predictor.fit(X, y_probability) | ||
self.life_time_value_predictor.fit(X, y_value) | ||
|
||
def estimate_value(self, lead_id) -> LeadValue: | ||
# make call to data base to retrieve relevant fields for this lead | ||
lead = get_database().get_lead_by_id(lead_id) | ||
|
||
# preprocess lead_data to get feature vector for our ML model | ||
feature_vector = np.zeros((1, 5)) | ||
feature_vector[0][lead.lead_id] = 1.0 | ||
|
||
# use the models to predict required values | ||
lead_value_pred = self.life_time_value_predictor.predict(feature_vector) | ||
# manually applying sigmoid to ensure value in range 0, 1 | ||
cust_prob_pred = 1 / ( | ||
1 + np.exp(-self.probability_predictor.predict(feature_vector)) | ||
) | ||
|
||
lead.lead_value = LeadValue( | ||
life_time_value=lead_value_pred, customer_probability=cust_prob_pred | ||
) | ||
get_database().update_lead(lead) | ||
|
||
# might not need to return here if the database is updated by this function | ||
return lead.lead_value |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]> | ||
|
||
from database import get_database | ||
from evp.evp import EstimatedValuePredictor | ||
|
||
lead_id = 1 | ||
|
||
lead = get_database().get_lead_by_id(lead_id) | ||
|
||
evp = EstimatedValuePredictor() | ||
lead_value = evp.estimate_value(lead_id) | ||
|
||
print( | ||
f""" | ||
Dummy prediction for lead#{lead.lead_id}: | ||
Lead: | ||
{lead} | ||
This lead has a predicted probability of {lead_value.customer_probability:.2f} to become a customer. | ||
This lead has a predicted life time value of {lead_value.life_time_value:.2f}. | ||
This results in a total lead value of {lead_value.get_lead_value():.2f}. | ||
""" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]> | ||
|
||
import json | ||
from typing import Dict | ||
|
||
import pytest | ||
|
||
|
||
@pytest.fixture | ||
def create_lead_dict(request) -> Dict: | ||
lead_value_adjustments = request.param | ||
lead_data = { | ||
"lead_id": 0, | ||
"annual_income": 0, | ||
"product_of_interest": "Nothing", | ||
"first_name": "Manu", | ||
"last_name": "Musterperson", | ||
"phone_number": "49123123123", | ||
"email_address": "[email protected]", | ||
} | ||
for key, value in lead_value_adjustments.items(): | ||
lead_data[key] = value | ||
yield lead_data |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# SPDX-License-Identifier: MIT | ||
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]> | ||
|
||
import os | ||
import sys | ||
|
||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src"))) | ||
|
||
from database import get_database | ||
from database.models import LeadValue | ||
from evp.evp import EstimatedValuePredictor | ||
|
||
|
||
def test_estimate_value(): | ||
leads = get_database().get_all_leads() | ||
evp = EstimatedValuePredictor() | ||
for lead in leads: | ||
value = evp.estimate_value(lead.lead_id) | ||
assert type(value) == LeadValue |
Oops, something went wrong.