From ae8e083b5d1feded8fc172a8d565ef2553f37eca Mon Sep 17 00:00:00 2001
From: Temitayo Aderounmu <91757922+temitayoaderounmu@users.noreply.github.com>
Date: Sat, 25 Nov 2023 22:15:04 -0600
Subject: [PATCH] Delete MarketPriceComparison.py

---
 MarketPriceComparison.py | 771 ---------------------------------------
 1 file changed, 771 deletions(-)
 delete mode 100644 MarketPriceComparison.py

diff --git a/MarketPriceComparison.py b/MarketPriceComparison.py
deleted file mode 100644
index 0789e5c..0000000
--- a/MarketPriceComparison.py
+++ /dev/null
@@ -1,771 +0,0 @@
-# %%
-%%javascript
-IPython.OutputArea.prototype._should_scroll = function(lines) {
-    return false;
-}
-
-# %%
-# import necessary libraries
-import pandas as pd
-import numpy as np
-import math
-import seaborn as sns
-import datetime
-import matplotlib.pylab as plt
-import plotly.graph_objects as go
-from pathlib import Path
-from matplotlib import pyplot
-from sklearn.linear_model import LinearRegression
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import RandomForestRegressor
-from matplotlib.ticker import NullFormatter
-import matplotlib.ticker as ticker
-from sklearn.gaussian_process import GaussianProcessRegressor
-from sklearn.gaussian_process.kernels import WhiteKernel, DotProduct, RBF
-from sklearn.metrics import mean_squared_error
-from sklearn.neighbors import KNeighborsRegressor
-%matplotlib inline
-
-# %%
-from pymongo import MongoClient
-import pymongo
-# MongoDB Atlas connection string
-# Replace '<your_connection_string>' with your actual connection string
-connection_string = '<your_connection_string>'
-try:
-    # Connect to MongoDB Atlas
-    client = MongoClient(connection_string)
-# return a friendly error if a URI error is thrown 
-except pymongo.errors.ConfigurationError:
-    print("An Invalid URI host error was received. Is your Atlas host name correct in your connection string (found the .env)?")
-
-#print(client.list_database_names())
-
-
-
-# %%
-database_name = 'scrape'
-db = client[database_name]
-#print(db.list_collection_names())
-
-
-# %%
-collection_name = 'scraped_raw'
-collection = db[collection_name]
-
-# Fetch data from MongoDB and convert it to a DataFrame
-cursor = collection.find()
-data = list(cursor)
-cars = pd.DataFrame(data)
-
-# Convert DataFrame to CSV
-csv_filename = 'output_data.csv'
-cars.to_csv(csv_filename, index=False)
-
-# Close the MongoDB connection
-client.close()
-
-print(f'Data has been successfully exported to {csv_filename}')
-
-
-
-# %%
-# Specify the database and collection
-# Replace '<your_database>' and '<your_collection>' with your actual database and collection names
-database_name = 'scrape'
-collection_name = 'scraped_processsed'
-db = client[database_name]
-collection = db[collection_name]
-
-# Fetch data from MongoDB and convert it to a DataFrame
-cursor = collection.find()
-data = list(cursor)
-cars = pd.DataFrame(data)
-
-# Convert DataFrame to CSV
-csv_filename = 'output_data.csv'
-cars.to_csv(csv_filename, index=False)
-
-# Close the MongoDB connection
-client.close()
-
-print(f'Data has been successfully exported to {csv_filename}')
-
-
-# %%
-cars = pd.read_csv('output_data.csv')
-
-# %%
-#cars = pd.read_csv('data.csv')
-
-# %%
-cars.head()
-
-# %%
-cars.shape[0]
-
-# %% [markdown]
-# ## Data Cleaning
-
-# %%
-# drop unnecessary variables: id, url, region_url, VIN, image_url, description, lat, and long
-cars = cars.drop(columns = ['_id', 'link'])
-
-# %%
-cars.head()
-
-# %%
-# look at the number of NA values by column
-cars.isna().sum()
-
-# %%
-#cars = cars.drop(columns = ['images/23','images/22','images/21','images/20','images/19','images/18','images/17','images/16','images/15','images/14','images/13','images/12','images/11','images/10','images/9','images/8', 'images/7','images/6','images/5','images/4','images/3','images/2','images/1'])
-
-# %%
-cars.head()
-
-# %%
-cars.shape[0]
-
-# %%
-# drop NA values
-cars = cars.dropna()
-
-# %%
-cars.shape[0]
-
-# %%
-cars.head()
-
-# %%
-# convert year and odometer to integer values
-#cars['odometer'] = cars['odometer'].str.replace('K', '').str.replace(' miles', '').astype(int) * 1000
-#cars['odometer'] = cars['odometer'].str.replace('K miles', '').str.replace(' miles ยท Dealership', '').astype(float).astype(int) * 1000
-import re
-cars['odometer'] = cars['odometer'].apply(lambda x: int(re.search(r'\d+', str(x)).group(0)) if re.search(r'\d+', str(x)) else None)
-cars['year'] = cars['title'].str.extract(r'(\d{4})')
-# Fill NaN values in the 'year' column with a placeholder (you can choose a value that makes sense)
-cars['year'].fillna(-1, inplace=True)
-cars = cars.astype({'year':'int', 'odometer':'int'})
-
-# %%
-# create an age variable to get a better understanding of how old a car is
-cars['age'] = datetime.date.today().year - cars['year']
-
-# %%
-cars.head()
-
-# %% [markdown]
-# ## Data Exploration
-
-# %%
-sns.histplot(data = cars, x = 'age', binwidth = 5).set(title = 'Distribution of Car Ages')
-
-# %%
-cars[cars['year'] < 1990]
-
-# %%
-cars = cars[cars.year >= 2005]
-
-# %%
-cars.shape[0]
-
-# %%
-# looking again at the distribution of ages
-sns.histplot(data = cars, x = 'age', binwidth = 1).set(title = 'Distribution of Car Ages')
-
-# %%
-cars.shape[0]
-
-# %%
-sns.histplot(data = cars, x = 'odometer').set(title = 'Distribution of Car Odometer')
-
-# %%
-cars[cars['odometer'] > 300]
-
-# %%
-cars = cars[cars.odometer <= 300]
-# Clean up the 'price' column by replacing non-numeric values with NaN
-cars['price'] = pd.to_numeric(cars['price'].replace('[\$,]', '', regex=True), errors='coerce')
-
-# Fill NaN values in the 'price' column with a placeholder (you can choose a value that makes sense)
-cars['price'].fillna(-1, inplace=True)
-
-# Convert 'price' column to integers
-cars['price'] = cars['price'].astype(int)
-
-
-# %%
-cars.shape[0]
-
-# %%
-sns.histplot(data = cars, x = 'odometer').set(title = 'Distribution of Car Odometer')
-
-# %%
-sns.scatterplot(data = cars, x = 'odometer', y = 'price').set(title = 'Correlation of Vehicle Odometer and List Price')
-
-# %%
-cars[cars['price'] > 100000]
-
-# %%
-cars = cars[cars.price <= 100000]
-
-# %%
-cars.shape[0]
-
-# %%
-# look at the odometer vs price now
-sns.scatterplot(data = cars, x = 'odometer', y = 'price').set(title = 'Correlation of Vehicle Odometer and List Price')
-
-# %%
-sns.histplot(data = cars, x = 'price').set(title = 'Distribution of List Price')
-
-# %%
-cars[cars['price'] < 1000]
-
-# %%
-cars = cars[cars.price >= 1000]
-
-# %%
-sns.histplot(data = cars, x = 'price').set(title = 'Distribution of List Price')
-
-# %%
-cars.shape[0]
-
-# %%
-#Dictionary of car makes
-kbb_make = {
-    'acura',
-    'alfa-romeo',
-    'aston-martin',
-    'audi',
-    'bentley',
-    'bmw',
-    'buick',
-    'cadillac',
-    'chevrolet',
-    'chrysler',
-    'dodge',
-    'ferrari',
-    'fiat',
-    'ford',
-    'genesis',
-    'gmc',
-    'honda',
-    'hyundai',
-    'infiniti',
-    'jaguar',
-    'jeep',
-    'kia',
-    'lamborghini',
-    'land rover',
-    'lexus',
-    'lincoln',
-    'lucid',
-    'maserati',
-    'mazda',
-    'mclaren',
-    'mercedes-benz',
-    'mini',
-    'mitsubishi',
-    'nissan',
-    'polestar',
-    'porsche',
-    'ram',
-    'rivian',
-    'rolls-royce',
-    'subaru',
-    'tesla',
-    'toyota',
-    'volkswagen',
-    'volvo',
-}
-
-# Dictionary to store car models for each make
-kbb_models = {
-    'acura': ['ILX', 'MDX', 'NSX','RDX','TLX'],
-    'alfa-romeo': ['Giulia', 'Stelvio'],
-    'aston-martin': ['DB11', 'DBS', 'DBX','Vantage'],
-    'audi': ['A3', 'A4','A4 allroad','A5', 'A6','A6 allroad',
-             'A7','A8','e-tron','e-tron GT','e-tron S',
-             'e-tron S Sportback','e-tron Sportback','Q3',
-             'Q4 e-tron','Q4 Sportback e-tron','Q5', 
-             'Q5 Sportback','Q7','Q8','R8','RS 3','RS 5',
-             'RS 6','RS 7', 'RS e-tron GT','RS Q8','S3',
-             'S4','S5','S6','S7','S8','SQ5','SQ5 Sportback',
-             'SQ7','SQ8','TT'],
-    'bentley': ['Bentayga','Continental GT','Flying Spur'],
-    'bmw': ['2 Series','3 Series','4 Series', '5 Series',
-            '6 Series','7 Series','8 Series','i4','iX','M3',
-            'M4','M5','M8','X1','X2','X3','X3 M','X4','X4 M','X5',
-            'X5 M','X6','X6 M','X7','Z4'],
-    'buick': ['Enclave','Encore','Encore GX','Envision'],
-    'cadillac': ['CT4','CT5','Escalade','Escalade ESV','XT4','XT5','XT6'],
-    'chevrolet': ['Blazer','Bolt EUV','Bolt EV','Camaro','Colorado Crew Cab',
-                  'Colorado Extended Cab','Corvette','Equinox','Express 2500 Cargo',
-                  'Express 2500 Passenger','Express 3500 Cargo','Express 3500 Passenger',
-                  'Malibu','Silverado 1500 Crew Cab','Silverado 1500 Double Cab',
-                  'Silverado 1500 Regular Cab','Silverado 1500 Limited Crew Cab',
-                  'Silverado 1500 Limited Double Cab','Silverado 1500 Limited Regular Cab',
-                  'Silverado 2500 HD Crew Cab','Silverado 2500 HD Double Cab',
-                  'Silverado 2500 HD Regular Cab','Silverado 3500 HD Crew Cab',
-                  'Silverado 3500 HD Double Cab','Silverado 3500 HD Regular Cab',
-                  'Spark','Suburban','Tahoe','Trailblazer','Traverse','Trax',
-                  'Avalanche','Cruze','Impala','Lumina','Malibu Maxx','Monte Carlo','SS','Uplander','Venture'],
-    'chrysler': ['300','Pacifica','Pacifica Hybrid','Voyager'],
-    'dodge': ['Charger', 'Challenger', 'Durango'],
-    'ferrari': ['296 GTB','812 Competizione','812 Competizione A','812 GTS',
-                'F8', 'Portofino', 'SF90', 'Roma'],
-    'fiat': ['500X'],
-    'ford': ['Bronco', 'Bronco Sport','E-Transit 350 Cargo Van','EcoSport', 'Edge', 
-             'Escape','Escape Plug-in Hybrid', 'Expedition','Expedition MAX', 
-             'Explorer', 'F150 Lightning', 'F150 Regular Cab','F150 Super Cab',
-             'F150 SuperCrew Cab', 'F250 Super Duty Crew Cab','F250 Super Duty Regular Cab',
-             'F250 Super Duty Super Cab', 'F350 Super Duty Crew Cab','F350 Super Duty Regular Cab',
-             'F350 Super Duty Super Cab', 'F450 Super Duty Crew Cab','F450 Super Duty Regular Cab', 
-             'Maverick', 'Mustang','Mustang MACH-E', 'Ranger SuperCab','Ranger SuperCrew', 
-             'Transit 150 Cargo Van', 'Transit 150 Crew Van', 'Transit 150 Passenger Van',              
-             'Transit 250 Cargo Van', 'Transit 250 Crew Van', 'Transit 350 HD Cargo Van', 'Transit 350 HD Crew Van',
-             'Transit 350 Cargo Van', 'Transit 350 Crew Van', 'Transit 350 Passenger Van', 
-             'Transit Connect Cargo Van', 'Transit Connect Passenger Wagon','C-Max Hybrid','C-Max Energi',
-             'Fiesta','Flex','Focus','Fusion','Mustang Mach-E','Taurus','Thunderbird'],
-    'genesis': ['G70', 'G80','G90', 'GV70','GV80'],
-    'gmc': ['Acadia','Canyon Crew Cab','Canyon Extended Cab','HUMMER EV Pickup','Savana 2500 Cargo','Savana 2500 Passenger', 
-            'Savana 3500 Cargo','Savana 3500 Passenger','Sierra 1500 Crew Cab', 'Sierra 1500 Double Cab','Sierra 1500 Regular Cab',
-            'Sierra 1500 Limited Crew Cab', 'Sierra 1500 Limited Double Cab', 'Sierra 1500 Limited Regular Cab', 
-            'Sierra 2500 HD Crew Cab','Sierra 2500 HD Double Cab','Sierra 2500 HD Regular Cab', 'Sierra 3500 HD Crew Cab',
-            'Sierra 3500 HD Double Cab','Sierra 3500 HD Regular Cab', 'Yukon', 'Yukon XL', 'Terrain','C1500 Pickup','C2500 Pickup',
-            'C3500 Pickup','Envoy','Envoy XL','Envoy XUV','Jimmy','R/V 3500 Series','S15 Jimmy','Safari','Savana 1500 Cargo',
-            'Savana 1500 Passenger','Savana 2500 Cargo','Savana 2500 Passenger','Savana 3500 Cargo',
-            'Savana 3500 Passenger','Sierra 1500 Classic Crew Cab','Sierra 1500 Classic Double Cab',
-            'Sierra 1500 Classic Regular Cab','Sierra 1500 HD Classic Crew Cab','Sierra 1500 HD Classic Double Cab',
-            'Sierra 1500 HD Classic Regular Cab','Sierra 2500 Classic Crew Cab','Sierra 2500 Classic Double Cab',
-            'Sierra 2500 Classic Regular Cab','Sierra 3500 Classic Crew Cab','Sierra 3500 Classic Double Cab',
-            'Sierra 3500 Classic Regular Cab','Sonoma','Suburban 1500','Suburban 2500','Syclone','Typhoon',
-            'Vandura 1500','Vandura 2500','Vandura 3500','Yukon XL 1500','Yukon XL 2500'],
-    'honda': ['Accord', 'Accord Hybrid', 'Civic', 'CR-V', 'CR-V Hybrid', 'HR-V', 'Insight', 'Odyssey', 'Passport', 'Pilot',
-              'Fit','Element','Prelude','S2000','Crosstour','Ridgeline'],
-    'hyundai': ['Accent', 'Elantra', 'Elantra N' , 'IONIQ5', 'Ioniq Hybrid','Ioniq Plug-in Hybrid','Kona', 'Kona Electric', 'Kona N',
-                'Palisade', 'Santa Fe', 'Santa Fe Hybrid', 'Santa Fe Plug-in Hybrid','Sonata', 'Sonata Hybrid', 'Tucson', 'Tucson Hybrid', 
-                'Tucson Plug-in Hybrid', 'Venue', 'Veloster', 'Santa Cruz','Azera','Equus','Genesis Coupe','Genesis G70','Genesis G80',
-                'Genesis G90','Santa Fe Sport','Santa Fe XL','Veracruz'],
-    'infiniti': ['Q50', 'Q60','QX50','QX55','QX60','QX80','EX35','EX37','FX35','FX37','FX45','FX50',
-                'G25','G35','G37','I30','I35','JX35',
-                'M35','M35h','M37','M45','M56','Q40',
-                'Q45','QX4','QX56','QX60 Hybrid',
-                'QX70',],
-    'jaguar': ['XF', 'E-PACE','F-PACE','F-TYPE', 'I-PACE','S-Type','X-Type','XE','XE SV Project 8','XJ','XJR',
-                'XJR-S','XJS','XK','XK-Series'],
-    'jeep': ['Cherokee', 'Compass', 'Gladiator', 'Grand Cherokee', 'Grand Cherokee 4xe', 'Grand Cherokee L', 'Grand Wagoneer','Renegade', 
-             'Wagoneer','Wrangler','Wrangler Unlimited', 'Wrangler Unlimited 4xe','CJ-5','CJ-7','CJ-8 Scrambler','Commander','Comanche','Dispatcher',
-            'FC-150','FC-170','FC-170 DRW','Gladiator (JT)','Grand Cherokee SRT',
-            'Grand Cherokee SRT8','Grand Cherokee Trackhawk','Grand Wagoneer (SJ)',
-            'J-100','J-2500','J-2600','J-2700','J-2800','J-3500','J-3600',
-            'J-3700','J-3800','J10','J20','Jeepster','Jeepster Commando','Liberty',
-            'Patriot','Scrambler','Wagoneer (SJ)','Wagoneer (XJ)','Willys','Wrangler (JK)',
-            'Wrangler (LJ)','Wrangler (TJ)','Wrangler (YJ)'],
-    'kia': ['Cadenza', 'Forte', 'K5', 'K900', 'Niro', 'Rio', 'Seltos', 'Sorento', 'Soul', 'Sportage', 'Stinger', 
-            'Telluride','Amanti','Borrego','Forte5','K900','Optima','Optima Hybrid','Rondo','Sedona',
-            'Sephia','Sorento Sport','Soul EV','Spectra','Sportage Hybrid','Telluride Nightfall Edition'],
-    'lamborghini': ['Aventador', 'Huracan', 'Urus','Gallardo','Centenario','Diablo','Gallardo Spyder','Huracan Evo','Huracan Evo Spyder',
-            'Huracan Performante','Huracan Performante Spyder','Huracan Spyder',
-            'Murcielago','Murcielago Roadster','Reventon','Urus Graphite Capsule'],
-    'land rover': ['LR2','LR4','Defender', 'Discovery', 'Discovery Sport', 'Range Rover', 'Range Rover Evoque', 'Range Rover Sport', 'Range Rover Velar',
-                   'Discovery Series II','Discovery Series II SD','Discovery Sport SD',
-                    'Discovery Series II SE7','Discovery Series II XD','Freelander',
-                    'Freelander SE','Freelander SE3','LR2 HSE','LR3','LR3 HSE','LR3 SE',
-                    'LR4 HSE','LR4 HSE LUX','LR4 V8','LR4 V8 HSE LUX','Range Rover 4.0 SE',
-                    'Range Rover 4.6 HSE','Range Rover HSE','Range Rover Velar R-Dynamic',
-                    'Range Rover Velar S','Range Rover Velar SE','Range Rover Velar SVAutobiography Dynamic',
-                    'Range Rover Westminster','Range Rover Westminster Edition','Range Rover 4.6 HSE',
-                    'Range Rover Autobiography','Range Rover Autobiography Black','Range Rover Autobiography Black LWB',
-                    'Range Rover Autobiography L','Range Rover Autobiography LWB',
-                    'Range Rover HSE','Range Rover HSE LUX','Range Rover Long Wheelbase','Range Rover SE',
-                    'Range Rover Sport','Range Rover Sport 5.0L V8 Supercharged','Range Rover Sport GT Limited Edition',
-                    'Range Rover Sport HSE','Range Rover Sport HST','Range Rover Sport Limited Edition','Range Rover Sport SC',
-                    'Range Rover Sport SE','Range Rover Sport Supercharged','Range Rover Sport SVR',
-                    'Range Rover Velar','Range Rover Velar First Edition','Range Rover Velar P250 S','Range Rover Velar P250 SE',
-                    'Range Rover Velar P250 R-Dynamic S','Range Rover Velar P250 R-Dynamic SE','Range Rover Velar P250 R-Dynamic HSE',
-                    'Range Rover Velar P340 S','Range Rover Velar P340 SE','Range Rover Velar P340 R-Dynamic S',
-                    'Range Rover Velar P340 R-Dynamic SE','Range Rover Velar P380 R-Dynamic HSE','Range Rover 4.0 SE',
-                    'Range Rover Autobiography','Range Rover Autobiography Black',
-                    'Range Rover Autobiography Black LWB','Range Rover Autobiography L',
-                    'Range Rover Autobiography LWB','Range Rover HSE','Range Rover HSE LUX',
-                    'Range Rover Long Wheelbase','Range Rover SE'],
-    'lexus': ['ES', 'GS', 'GX', 'IS', 'LS', 'LX', 'NX', 'RC', 'RX', 'UX','CT','ES 250','ES 300','ES 300h','ES 330','ES 350','GS 200t','GS 300',
-            'GS 350','GS 400','GS 430','GS 450h','GS 460','GS F','GX 460','GX 470','HS 250h','IS 200t','IS 250',
-        'IS 300','IS 350','IS 350C','LC 500','LC 500h','LFA','LS 400',
-        'LS 430','LS 460','LS 500','LS 500h','LS 600h','LX 450','LX 470',
-        'LX 570','NX 200t','NX 300','NX 300h','RC 200t','RC 300','RC 350',
-        'RC F','RX 300','RX 330','RX 350','RX 350L','RX 400h','RX 450h',
-        'RX 450hL','SC 300','SC 400','SC 430','UX 200','UX 250h',
-        'GS 300','GS 350','GS 450h','GS F','IS 200t','IS 300',
-        'LC 500','LC 500h','LS 500','LS 500h','LX 570','NX 200t',
-        'NX 300','RC 300','RC 350','RC F','RX 350','RX 450h',
-        'ES 250','ES 300','ES 300h','ES 330','ES 350','GS 200t',
-        'GS 300','GS 350','GS 400','GS 430','GS 450h','GS F','GX 460',
-        'GX 470','HS 250h','IS 200t','IS 250','IS 300','IS 350',
-        'IS 350C','LC 500','LC 500h','LFA','LS 400','LS 430',
-        'LS 460','LS 500','LS 500h','LS 600h','LX 450','LX 470',
-        'LX 570','NX 200t','NX 300','NX 300h','RC 200t','RC 300',
-        'RC 350','RC F','RX 300','RX 330','RX 350','RX 350L',
-        'RX 400h','RX 450h','RX 450hL','SC 300','SC 400','SC 430',
-        'UX 200','UX 250h'],
-    'lincoln': ['Aviator', 'Continental', 'Corsair', 'MKS','MKZ','MKT','MKX', 'Nautilus', 'Navigator', 'Navigator L','Blackwood','Capri','Continental Mark III','Continental Mark IV',
-                'Continental Mark V','Continental Mark VI','Corsair','LS','Mark LT',
-                'Mark VI','Mark VII','Mark VIII','MKS','MKT','MKX','MKZ',
-                'Nautilus','Navigator','Navigator L','Town Car','Versailles',
-                'Zephyr'],
-    'lucid': ['Air','Air Dream Edition','Air Grand Touring','Air Pure','Air Touring'],
-    'maserati': ['Ghibli', 'GranTurismo','Levante','MC20', 'Quattroporte','430','GranSport','GranTurismo MC','Levante GTS','Levante S',
-                'Quattroporte GTS','Quattroporte S','Quattroporte Trofeo'],
-    'mazda': ['CX-30', 'CX-5', 'CX-9', 'Mazda3', 'Mazda6', 'MX-5 Miata','323','626','929','B-Series Pickup','CX-3','CX-7','CX-9',
-                'GLC','Mazda2','Mazda3 Sport','Mazda5','Mazda6 Sport',
-                'MAZDASPEED MX-5','MAZDASPEED Protege','MAZDASPEED3',
-                'MAZDASPEED6','Millenia','MPV','MX-3','MX-5 Miata RF','MX-6',
-                'Navajo','Protege','Protege5','RX-7','RX-8','Tribute'],
-                'mclaren': ['570S', '600LT', '720S', '765LT','GT',
-                '570S Spider','600LT Spider','620R','675LT','675LT Spider',
-                '720S Spider','765LT Spider','GT','GT Coupe','GT Spider',
-                'MP4-12C','MP4-12C Spider','P1'],
-    'mclaren': ['570S', '600LT', '720S', '765LT','GT'],
-    'mercedes-benz': ['A-Class', 'C-Class', 'E-Class', 'G-Class', 'GLA', 'GLC', 'GLE', 'GLS', 'S-Class','190-Class','260-Class','300-Class','350-Class','380-Class','400-Class','420-Class',
-                '450-Class','500-Class','560-Class','600-Class','AMG GT','B-Class','C-Class Coupe',
-                'C-Class Wagon','CL-Class','CLA','CLA-Class','CLK-Class','CLS','CLS-Class',
-                'E-Class Coupe','E-Class Wagon','G-Class','GL-Class','GLA-Class','GLB','GLB-Class',
-                'GLC Coupe','GLC-Class','GLE Coupe','GLE-Class','GLK-Class','GLS-Class',
-                'M-Class','Maybach S-Class','Metris','R-Class','S-Class Coupe',
-                'S-Class Maybach','SL-Class','SLC-Class','SLK-Class','SLR McLaren','SLS AMG',
-                'Sprinter'],
-    'mini': ['Clubman', 'Convertible', 'Countryman', 'Hardtop','Cooper','Cooper Clubman','Cooper Countryman','Cooper Paceman',
-        'Hardtop 2 Door','Hardtop 4 Door','John Cooper Works',
-        'John Cooper Works Clubman','John Cooper Works Convertible',
-        'John Cooper Works Countryman','John Cooper Works Hardtop',
-        'John Cooper Works Hardtop 2 Door','John Cooper Works Hardtop 4 Door',
-        'John Cooper Works Paceman'],
-    'mitsubishi': ['Outlander', 'Eclipse Cross', 'Pajero','3000GT','Diamante','Eclipse','Eclipse Spyder','Endeavor','Expo',
-                'Galant','Lancer','Mighty Max','Mirage','Montero','Montero Sport',
-                'Outlander PHEV','Outlander Sport','Raider','Sigma','Van'],
-    'nissan': ['370Z', 'Altima', 'Armada', 'Frontier', 'Kicks', 'LEAF', 'Maxima', 'Murano', 'NV Cargo', 'NV Passenger', 'NV200', 'Pathfinder', 'Rogue', 'Rogue Sport', 'Titan', 'Versa',
-               '200SX','240SX','300ZX','350Z','370Z Coupe','370Z NISMO','370Z Roadster',
-                'Altima Hybrid','Armada','Cube','Frontier King Cab','Frontier Crew Cab',
-                'GT-R','Kicks','LEAF','Maxima','Murano','Murano CrossCabriolet','NV Cargo','NV Passenger','NV200',
-                'Pathfinder','Quest','Rogue','Rogue Select','Rogue Sport',
-                'Sentra','Titan King Cab','Titan Crew Cab','Titan XD',
-                'Versa','Versa Note','Versa Sedan','Xterra','Altima','Armada',
-                'Cube','Frontier','GT-R','JUKE','Kicks','LEAF','Maxima',
-                'Murano','Murano CrossCabriolet','NV Cargo','NV Passenger',
-                'NV200','Pathfinder','Quest','Rogue','Rogue Select','Rogue Sport',
-                'Sentra','Titan'],
-    'polestar': ['2', '1', '3'],
-    'porsche': ['911', 'Cayenne', 'Macan', 'Panamera', 'Taycan','718 Cayman','911 Carrera','911 Carrera 4','911 Carrera 4 Cabriolet',
-                '911 Carrera 4 GTS','911 Carrera 4S','911 Carrera 4S Cabriolet',
-                '911 Carrera Cabriolet','911 Carrera GTS','911 Carrera S',
-                '911 Carrera S Cabriolet','911 Targa','911 Targa 4',
-                '911 Targa 4 GTS','911 Targa 4S','911 Turbo','911 Turbo Cabriolet',
-                '911 Turbo S','911 Turbo S Cabriolet','918 Spyder','Boxster',
-                'Carrera GT','Cayenne','Cayenne Coupe','Cayenne E-Hybrid','Cayenne GTS',
-                'Cayenne S','Cayenne S E-Hybrid','Cayenne Turbo','Cayenne Turbo S E-Hybrid',
-                'Cayman','Macan','Panamera','Panamera 4','Panamera 4 E-Hybrid',
-                'Panamera 4 Executive','Panamera 4S','Panamera 4S E-Hybrid',
-                'Panamera Executive','Panamera GTS','Panamera S','Panamera Turbo',
-                'Panamera Turbo Executive','Panamera Turbo S E-Hybrid'],
-    'ram': ['1500', '2500', '3500','1500 Classic','1500 Classic Crew Cab','1500 Classic Quad Cab',
-        '1500 Classic Regular Cab','2500','3500','3500 Chassis Cab',
-        '4500 Chassis Cab','5500 Chassis Cab','C/V','Dakota',
-        'ProMaster Cargo Van','ProMaster Chassis Cab','ProMaster City Cargo',
-        'ProMaster City Wagon','ProMaster Cutaway','ProMaster Window Van'],
-    'rivian': ['R1T', 'R1S'],
-    'rolls-royce': ['Cullinan', 'Dawn', 'Ghost', 'Phantom', 'Wraith','Cullinan Black Badge','Dawn','Dawn Black Badge','Ghost',
-                'Ghost Black Badge','Phantom','Phantom Drophead Coupe',
-                'Phantom Drophead Coupe Waterspeed Collection','Phantom Coupe',
-                'Phantom Coupe Aviator Collection','Phantom Coupe Waterspeed Collection',
-                'Phantom Coupe Tiger Edition','Wraith','Wraith Black Badge'],
-    'subaru': ['Ascent', 'BRZ', 'Crosstrek', 'Forester', 'Impreza', 'Legacy', 'Outback', 'WRX',
-               'Ascent','Baja','BRAT','BRZ','Crosstrek','Crosstrek Hybrid',
-                'Forester','Impreza','Impreza Outback Sport','Impreza WRX',
-                'Legacy','Outback','SVX','Tribeca','WRX','XT','XT6'],
-    'tesla': ['Model 3', 'Model S', 'Model X', 'Model Y'],
-    'toyota': ['4Runner', 'Avalon', 'C-HR', 'Camry', 'Corolla', 'Highlander', 'Land Cruiser', 'Prius', 'RAV4', 
-               'Sequoia', 'Sienna', 'Tacoma', 'Tundra', 'Venza', 'Yaris', 'Corolla Hatchback',
-                'Corolla Hybrid','Corolla iM','FJ Cruiser','GR86','Highlander',
-                'Highlander Hybrid','Land Cruiser','Matrix','Mirai','MR2',
-                'MR2 Spyder','Paseo','Previa','Prius','Prius c','Prius Plug-in',
-                'Prius Prime','Prius Prime Advanced','Prius Prime Premium',
-                'Prius v','RAV4','RAV4 Prime','Sequoia','Sienna',
-                'Supra','T100','Tacoma','Tercel','Tundra','Venza',
-                'Yaris','Yaris iA','4Runner','Avalon','Avalon Hybrid',
-                'C-HR','Camry','Camry Hybrid','Camry Solara','Celica',
-                'Corolla','Corolla Hatchback','Corolla Hybrid','Corolla iM',
-                'FJ Cruiser','GR86','Highlander','Highlander Hybrid',
-                'Land Cruiser','Matrix','Mirai','MR2','MR2 Spyder',
-                'Paseo','Previa','Prius','Prius c','Prius Plug-in',
-                'Prius Prime','Prius Prime Advanced','Prius Prime Premium',
-                'Prius v','RAV4','RAV4 Prime','Sequoia','Sienna',
-                'Supra','T100','Tacoma','Tercel','Tundra','Venza',
-                'Yaris','Yaris iA'],
-    'volkswagen': ['Arteon', 'Atlas', 'Golf', 'Jetta', 'Passat', 'Tiguan',
-                   'Atlas','Atlas Cross Sport','Beetle','Cabrio',
-                'CC','e-Golf','EuroVan','Fox','GLI','Golf','Golf Alltrack',
-                'Golf GTI','Golf R','Golf SportWagen','ID.4','Jetta','Jetta GLI',
-                'Karmann Ghia','Microbus','Passat','Quantum','R32','Rabbit',
-                'Routan','Tiguan','Touareg','Touareg 2'],
-    'volvo': ['S60', 'S90', 'V60', 'V90', 'XC40', 'XC60', 'XC90','240','740','760','780','850','940','960','C30',
-                'C40 Recharge','C70','S40','S60 Cross Country','S60 Recharge',
-                'S70','S80','S90 Recharge','V40','V50','V60','V60 Cross Country',
-                'V60 Recharge','V70','V90','V90 Cross Country','V90 Recharge',
-                'XC40','XC40 Recharge','XC60','XC60 Recharge','XC70',
-                'XC90','XC90 Recharge'],
-}
-
-# %%
-
-from difflib import get_close_matches
-
-title = cars['title']
-
-# Function to extract make from title
-def extract_make(title):
-    title_lower = title.lower()
-    for make in kbb_make:
-        if make in title_lower:
-            return make
-        elif (len(make) >= 4 and make[:4] in title_lower):
-            return make
-    return None
-
-def extract_model_wreg(title, make):
-    title_lower = title.lower()
-
-    # Check if the make is in the kbb_models dictionary
-    make_models = kbb_models.get(make, [])
-
-    # Use regex to find patterns like "2021 RAM 3500" in the title
-    match = re.search(r'\b\d{4}\s*[a-zA-Z0-9-]+\s*([a-zA-Z0-9-]+)\b', title)
-
-    if match:
-        # Extracted model is in the first capturing group
-        model = match.group(1)
-
-        if model is not None:
-            # Handle models with dashes
-            model = model.replace("-", "")
-
-            # Compare with the kbb_models dictionary
-            matched_model = get_close_matches(model, make_models, n=1)
-
-            if matched_model:
-                return matched_model[0]
-            else:
-                # If no direct match, try finding a close match using pieces of words
-                title_words = re.findall(r'\b\w+\b', title)
-                extracted_model_pieces = []
-
-                for word in title_words:
-                    # Check if the word is part of the make name, if yes, skip it
-                    if make is not None and word.lower() in make.lower():
-                        continue
-
-                    extracted_model_pieces.append(word)
-                    current_model_attempt = ' '.join(extracted_model_pieces)
-
-                    # Check if the current attempt is a close match
-                    matched_model = get_close_matches(current_model_attempt, make_models, n=1)
-                    if matched_model:
-                        return matched_model[0]
-
-                # If still no match, return the original extracted model
-                return model
-        else:
-            return None
-    else:
-        return None
-
-# Apply the extraction functions
-cars['manufacturer'] = cars['title'].apply(extract_make)
-cars['model'] = cars.apply(lambda row: extract_model_wreg(row['title'], row['manufacturer']), axis=1)
-
-# Print the DataFrame with 'manufacturer' and 'model'
-result_df = cars[['manufacturer', 'model']]
-print(result_df)
-
-# Save the selected columns to CSV
-result_df.to_csv('output.csv', index=False)
-
-fig, ax = plt.subplots(figsize=(20, 10))
-sns.barplot(data=cars, x='manufacturer', y='price', estimator=np.median, ax=ax).set(title='Median List Price by Manufacturer')
-plt.xticks(rotation=45)
-
-# %%
-cars.groupby(['manufacturer'])['price'].median().sort_values(ascending = False)
-
-# %%
-cars['manufacturer'].value_counts()
-
-# %%
-cars.shape[0]
-
-# %%
-# Drop rows where either 'manufacturer' or 'model' is empty
-cars.dropna(subset=['manufacturer', 'model'], inplace=True)
-
-# Print the DataFrame with 'manufacturer' and 'model'
-result_df = cars[['manufacturer', 'model']]
-print(result_df)
-
-# Save the selected columns to CSV
-#result_df.to_csv('outputff.csv', index=False)
-
-
-# %%
-cars.shape[0]
-
-# %%
-numeric_columns = ['price', 'year', 'odometer','age']
-#cars.groupby(['title_status'])[numeric_columns].median()
-
-# %% [markdown]
-# ## Modeling
-
-# %% [markdown]
-# ### Segmentation
-
-# %%
-cars.head()
-
-# %%
-import requests
-from bs4 import BeautifulSoup
-from concurrent.futures import ThreadPoolExecutor
-import pandas as pd
-import numpy as np
-from difflib import get_close_matches
-import matplotlib.pyplot as plt
-import seaborn as sns
-
-# Function to scrape current market price from Kelly Blue Book
-def get_kbb_price(row):
-    base_url = 'https://www.kbb.com/'
-    
-    # Check if vehicle_make and vehicle_model are not None or float
-    if isinstance(row['manufacturer'], str) and isinstance(row['model'], str):
-        # Replace spaces with dashes in the manufacturer and model for the URL
-        make_url_part = row["manufacturer"].lower().replace(" ", "-")
-        model_url_part = row["model"].lower().replace(" ", "-")
-        
-        search_url = f'{base_url}{make_url_part}/{model_url_part}/{row["year"]}/'
-        #print(search_url)
-        try:
-            response = requests.get(search_url)
-            soup = BeautifulSoup(response.text, 'html.parser')
-            # Extract relevant information (adjust based on the actual HTML structure)
-            # Extract the price information from the HTML code
-            #price_field = soup.find('div', {'class': 'nationalBaseDefaultPrice'})
-            #kbb_price = price_field['content'] if price_field else None
-
-            # Use regular expression to extract the price information
-            pattern = re.compile(r'"nationalBaseDefaultPrice":(\d+),')
-            match = pattern.search(response.text)
-            
-            kbb_price = match.group(1) if match else None
-
-            return kbb_price
-
-            #return kbb_price.text.strip() if kbb_price else None
-        except Exception as e:
-            print(f"Error: {e}")
-            return None
-    else:
-        return None
-
-
-# Use ThreadPoolExecutor to parallelize the scraping process
-with ThreadPoolExecutor(max_workers=5) as executor:
-    kbb_prices = list(executor.map(get_kbb_price, cars.to_dict(orient='records')))
-
-# Add the kbb_prices to the DataFrame
-cars['kbb_price'] = kbb_prices
-
-# Drop rows where kbb_price is empty
-cars = cars.dropna(subset=['kbb_price'])
-
-# Compare the actual market price with the dataset
-cars['price_difference'] = np.abs(cars['price'] - cars['kbb_price'].astype(float))
-
-# Print the results
-#print(cars[['manufacturer', 'model', 'year', 'price', 'kbb_price', 'price_difference']])
-
-# Flag rows with price difference greater than 10000 as fraudulent
-cars['fraudulent'] = np.where(cars['price_difference'] > 10000, True, False)
-
-# Print the results
-output_df = cars[['manufacturer', 'model', 'year', 'price', 'kbb_price', 'price_difference', 'fraudulent']]
-print(output_df)
-
-# Save the results to a CSV file
-output_csv_filename = 'price_comparison_results.csv'
-output_df.to_csv(output_csv_filename, index=False)
-print(f'Results have been saved to {output_csv_filename}')
-
-
-# %%
-# Visualize the comparison
-fig, ax = plt.subplots(figsize=(20, 10))
-sns.barplot(data=cars, x='manufacturer', y='price_difference', estimator=np.median, ax=ax).set(title='Median Price Difference by Manufacturer')
-plt.xticks(rotation=45)
-plt.show()
-
-# Visualize the average fraudulent status on a separate plot
-avg_fraudulent_status = cars.groupby('manufacturer')['fraudulent'].mean().reset_index()
-plt.figure(figsize=(12, 6))
-sns.barplot(data=avg_fraudulent_status, x='manufacturer', y='fraudulent', color='red').set(title='Average Fraudulent Status by Manufacturer')
-plt.xticks(rotation=45)
-plt.show()
-
-# %%
-cars['kbb_price'] = pd.to_numeric(cars['kbb_price'], errors='coerce')
-
-# Calculate the percentage difference between the actual price and KBB price
-cars['price_difference_percentage'] = ((cars['price'] - cars['kbb_price']) / cars['kbb_price']) * 100
-
-# Set a threshold for percentage difference, beyond which a vehicle is flagged as potentially fraudulent
-fraud_threshold = 10  # You can adjust this threshold based on your criteria
-
-# Create a new column 'fraudulent' to flag potentially fraudulent vehicles
-cars['fraudulent'] = np.abs(cars['price_difference_percentage']) > fraud_threshold
-
-# Print or analyze the flagged vehicles
-fraudulent_vehicles = cars[cars['fraudulent']]
-print(fraudulent_vehicles[['manufacturer', 'model', 'year', 'price', 'kbb_price', 'price_difference_percentage']])
-
-# %%
-'''
-#from sklearn.model_selection import train_test_split
-#from sklearn.ensemble import IsolationForest
-#from sklearn.preprocessing import StandardScaler
-#from sklearn.metrics import classification_report
-
-#features = ['year', 'odometer']
-#target = 'price'
-data = cars[features + [target]].copy()
-
-train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
-
-scaler = StandardScaler()
-train_data[features] = scaler.fit_transform(train_data[features])
-test_data[features] = scaler.transform(test_data[features])
-
-model = IsolationForest(contamination=0.05, random_state=42)
-model.fit(train_data[features])
-
-test_data['anomaly_score'] = model.decision_function(test_data[features])
-
-threshold = -0.2
-
-test_data['anomaly'] = test_data['anomaly_score'] < threshold
-
-print(classification_report(test_data[target], ~test_data['anomaly']))
-'''
-
-
-