Skip to content

Commit

Permalink
Merge pull request #39 from amosproj/feature/evp-skeleton
Browse files Browse the repository at this point in the history
Implemented the EVP skeleton class. Issue #22
  • Loading branch information
felix-zailskas authored Nov 3, 2023
2 parents ef262bf + 2b34b04 commit 8107616
Show file tree
Hide file tree
Showing 19 changed files with 1,187 additions and 8 deletions.
10 changes: 4 additions & 6 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,22 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest pipenv
pip install pipenv
# if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pipenv install
pipenv install --dev
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
pipenv run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
pipenv run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pipenv run pytest
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pids
# Python
*.pyc
__pycache__/
Pipfile.lock
# Pipfile.lock

# Jupyter Notebook
.ipynb_checkpoints
Expand Down
156 changes: 156 additions & 0 deletions LICENSES/CC-BY-4.0.txt

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@ verify_ssl = true
name = "pypi"

[dev-packages]
pytest = "==7.4.3"
pytest = "*"
pre-commit = "*"
flake8 = "*"

[packages]
numpy = "==1.26.1"
scikit-learn = "==1.3.2"
pydantic = "==2.4.2"
email-validator = "==2.1.0"

[requires]
python_version = "3.10"
521 changes: 521 additions & 0 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Pipfile.lock.license

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions src/database/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from .database_dummy import DatabaseDummy

_database = None


def get_database() -> DatabaseDummy:
global _database
if _database is None:
_database = DatabaseDummy()
return _database
25 changes: 25 additions & 0 deletions src/database/database_dummy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

import json
from typing import List

from database.models import Lead
from database.parsers import LeadParser


class DatabaseDummy:
def __init__(self) -> None:
with open("src/database/dummy_leads.json") as f:
json_data = json.load(f)["training_leads"]
self.data = {d["lead_id"]: d for d in json_data}

def get_lead_by_id(self, id_: int) -> Lead:
return LeadParser.parse_lead_from_dict(self.data[id_])

def get_all_leads(self) -> List[Lead]:
return [LeadParser.parse_lead_from_dict(entry) for entry in self.data.values()]

def update_lead(self, lead: Lead):
print(f"Updating database entry for lead#{lead.lead_id}")
print(f"Update values: {lead}")
59 changes: 59 additions & 0 deletions src/database/dummy_leads.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"training_leads": [
{
"lead_id": 0,
"annual_income": 25000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.1,
"life_time_value": 400000
},
{
"lead_id": 1,
"annual_income": 70000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.4,
"life_time_value": 40000
},
{
"lead_id": 2,
"annual_income": 15000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.8,
"life_time_value": 40000
},
{
"lead_id": 3,
"annual_income": 2500000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.08,
"life_time_value": 400000
},
{
"lead_id": 4,
"annual_income": 1200,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.9,
"life_time_value": 3400.23
}
]
}
2 changes: 2 additions & 0 deletions src/database/dummy_leads.json.license
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SPDX-License-Identifier: CC-BY-4.0
SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
49 changes: 49 additions & 0 deletions src/database/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from enum import Enum, IntEnum
from typing import List, Optional

from pydantic import BaseModel, EmailStr, Field


class AnnualIncome(IntEnum):
Nothing = 0 # 0€
Class1 = 1 # (0€, 35000€]
Class2 = 35001 # (35000€, 60000€]
Class3 = 60001 # (60000€, 100000€]
Class4 = 100001 # (100000€, 200000€]
Class5 = 200001 # (200000€, 400000€]
Class6 = 400001 # (400000€, 600000€]
Class7 = 600001 # (600000€, 1000000€]
Class8 = 1000001 # (1000000€, 2000000€]
Class9 = 2000001 # (2000000€, 5000000€]
Class10 = 5000001 # (5000000€, inf€]


class ProductOfInterest(str, Enum):
Nothing = "Nothing"
Terminals = "Terminals"
CashRegisterSystem = "Cash Register System"
BusinessAccount = "Business Account"
All = "All"
Other = "Other"


class LeadValue(BaseModel):
life_time_value: float = Field(..., ge=0)
customer_probability: float = Field(..., ge=0, le=1)

def get_lead_value(self) -> float:
return self.life_time_value * self.customer_probability


class Lead(BaseModel):
lead_id: int # could be expended to a UUID later
first_name: str
last_name: str
email_address: EmailStr
phone_number: str
annual_income: AnnualIncome
product_of_interest: ProductOfInterest
lead_value: Optional[LeadValue]
44 changes: 44 additions & 0 deletions src/database/parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from typing import Dict

from database.models import AnnualIncome, Lead, LeadValue, ProductOfInterest


class LeadParser:
@staticmethod
def parse_lead_from_dict(data: Dict) -> Lead:
customer_probability = (
data["customer_probability"]
if "customer_probability" in data.keys()
else None
)
life_time_value = (
data["life_time_value"] if "life_time_value" in data.keys() else None
)

if customer_probability is not None and life_time_value is not None:
lead_value = LeadValue(
life_time_value=life_time_value,
customer_probability=customer_probability,
)
else:
lead_value = None

annual_income = AnnualIncome.Nothing
for income_value in AnnualIncome:
if data["annual_income"] < income_value:
break
annual_income = income_value

return Lead(
lead_id=data["lead_id"],
first_name=data["first_name"],
last_name=data["last_name"],
email_address=data["email_address"],
phone_number=data["phone_number"],
annual_income=annual_income,
product_of_interest=ProductOfInterest(data["product_of_interest"]),
lead_value=lead_value,
)
2 changes: 2 additions & 0 deletions src/evp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
47 changes: 47 additions & 0 deletions src/evp/evp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

import numpy as np
from sklearn.linear_model import LinearRegression

from database import get_database
from database.models import LeadValue


class EstimatedValuePredictor:
def __init__(self) -> None:
self.probability_predictor = LinearRegression()
self.life_time_value_predictor = LinearRegression()

all_leads = get_database().get_all_leads()
X = np.identity(len(all_leads))
y_probability = np.array(
[lead.lead_value.customer_probability for lead in all_leads]
)
y_value = np.array([lead.lead_value.life_time_value for lead in all_leads])

self.probability_predictor.fit(X, y_probability)
self.life_time_value_predictor.fit(X, y_value)

def estimate_value(self, lead_id) -> LeadValue:
# make call to data base to retrieve relevant fields for this lead
lead = get_database().get_lead_by_id(lead_id)

# preprocess lead_data to get feature vector for our ML model
feature_vector = np.zeros((1, 5))
feature_vector[0][lead.lead_id] = 1.0

# use the models to predict required values
lead_value_pred = self.life_time_value_predictor.predict(feature_vector)
# manually applying sigmoid to ensure value in range 0, 1
cust_prob_pred = 1 / (
1 + np.exp(-self.probability_predictor.predict(feature_vector))
)

lead.lead_value = LeadValue(
life_time_value=lead_value_pred, customer_probability=cust_prob_pred
)
get_database().update_lead(lead)

# might not need to return here if the database is updated by this function
return lead.lead_value
26 changes: 26 additions & 0 deletions src/evp_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from database import get_database
from evp.evp import EstimatedValuePredictor

lead_id = 1

lead = get_database().get_lead_by_id(lead_id)

evp = EstimatedValuePredictor()
lead_value = evp.estimate_value(lead_id)

print(
f"""
Dummy prediction for lead#{lead.lead_id}:
Lead:
{lead}
This lead has a predicted probability of {lead_value.customer_probability:.2f} to become a customer.
This lead has a predicted life time value of {lead_value.life_time_value:.2f}.
This results in a total lead value of {lead_value.get_lead_value():.2f}.
"""
)
24 changes: 24 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

import json
from typing import Dict

import pytest


@pytest.fixture
def create_lead_dict(request) -> Dict:
lead_value_adjustments = request.param
lead_data = {
"lead_id": 0,
"annual_income": 0,
"product_of_interest": "Nothing",
"first_name": "Manu",
"last_name": "Musterperson",
"phone_number": "49123123123",
"email_address": "[email protected]",
}
for key, value in lead_value_adjustments.items():
lead_data[key] = value
yield lead_data
File renamed without changes.
19 changes: 19 additions & 0 deletions tests/test_evp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))

from database import get_database
from database.models import LeadValue
from evp.evp import EstimatedValuePredictor


def test_estimate_value():
leads = get_database().get_all_leads()
evp = EstimatedValuePredictor()
for lead in leads:
value = evp.estimate_value(lead.lead_id)
assert type(value) == LeadValue
Loading

0 comments on commit 8107616

Please sign in to comment.