From 6d1fddc59228993288515c849889d3191cdd0f76 Mon Sep 17 00:00:00 2001
From: Ozlem <cse.ozlemkorpe@gmail.com>
Date: Mon, 10 Aug 2020 17:05:58 +0300
Subject: [PATCH] KMeans updated

---
 KMeans_Guide.asv | 28 ++++++++++++++++++++++++++++
 KMeans_Guide.m   | 22 +++++++++++++++++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 KMeans_Guide.asv

diff --git a/KMeans_Guide.asv b/KMeans_Guide.asv
new file mode 100644
index 0000000..1cbb836
--- /dev/null
+++ b/KMeans_Guide.asv
@@ -0,0 +1,28 @@
+%%%------------- K-Means Clustering
+% Step 1: Choose the number k of clusters
+
+% Step 2: Select k point at random called centroids
+
+% Step 3: Assign each datapoint to the nearest centroid which lead to K
+% clusters
+
+% Step 4: Compute new centroids  of each cluster based on the datapoints it
+% contains
+
+% Step 5: Reassign each datapoint to the new closest centroid
+
+% Choosing the k value, search for WCSS. "The Elbow Method"
+
+% Import the dataset
+data = readtable('Datasets\Mall_Customers.csv');
+
+%Check for missing values
+missings = sum(ismissing(data));
+
+%Plot variables to check for outliers
+IncomePlot = plot(data.AnnualIncome);
+SpendingPlot = plot(data.SpendingScore);
+
+% Perform Feature Scaling (Standardization Method)
+stand_income = (data.AnnualIncome - mean(data.AnnualIncome)) / std(data.AnnualIncome);
+data.AnnualIncome = stand_I
\ No newline at end of file
diff --git a/KMeans_Guide.m b/KMeans_Guide.m
index b192237..df11939 100644
--- a/KMeans_Guide.m
+++ b/KMeans_Guide.m
@@ -13,5 +13,25 @@
 
 % Choosing the k value, search for WCSS. "The Elbow Method"
 
-
+% Import the dataset
 data = readtable('Datasets\Mall_Customers.csv');
+
+%Check for missing values
+missings = sum(ismissing(data));
+
+%Plot variables to check for outliers
+IncomePlot = plot(data.AnnualIncome);
+SpendingPlot = plot(data.SpendingScore);
+
+% Perform Feature Scaling (Standardization Method)
+stand_income = (data.AnnualIncome - mean(data.AnnualIncome)) / std(data.AnnualIncome);
+data.AnnualIncome = stand_income; 
+
+stand_spending = (data.SpendingScore - mean(data.SpendingScore)) / std(data.SpendingScore);
+data.SpendingScore = stand_spending; 
+
+% Select columns for clustering
+selected_data = data(:,4:5);
+
+%Data must be an array to be used in clustering algorithm
+arrayed_data = table2array(selected_data);