KMeans updated

ozlemkorpe · Aug 10, 2020 · 6d1fddc · 6d1fddc
1 parent b8ca2e0
commit 6d1fddc
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 1 deletion.
diff --git a/KMeans_Guide.asv b/KMeans_Guide.asv
@@ -0,0 +1,28 @@
+%%%------------- K-Means Clustering
+% Step 1: Choose the number k of clusters
+
+% Step 2: Select k point at random called centroids
+
+% Step 3: Assign each datapoint to the nearest centroid which lead to K
+% clusters
+
+% Step 4: Compute new centroids  of each cluster based on the datapoints it
+% contains
+
+% Step 5: Reassign each datapoint to the new closest centroid
+
+% Choosing the k value, search for WCSS. "The Elbow Method"
+
+% Import the dataset
+data = readtable('Datasets\Mall_Customers.csv');
+
+%Check for missing values
+missings = sum(ismissing(data));
+
+%Plot variables to check for outliers
+IncomePlot = plot(data.AnnualIncome);
+SpendingPlot = plot(data.SpendingScore);
+
+% Perform Feature Scaling (Standardization Method)
+stand_income = (data.AnnualIncome - mean(data.AnnualIncome)) / std(data.AnnualIncome);
+data.AnnualIncome = stand_I
diff --git a/KMeans_Guide.m b/KMeans_Guide.m
@@ -13,5 +13,25 @@
 
 % Choosing the k value, search for WCSS. "The Elbow Method"
 
-
+% Import the dataset
 data = readtable('Datasets\Mall_Customers.csv');
+
+%Check for missing values
+missings = sum(ismissing(data));
+
+%Plot variables to check for outliers
+IncomePlot = plot(data.AnnualIncome);
+SpendingPlot = plot(data.SpendingScore);
+
+% Perform Feature Scaling (Standardization Method)
+stand_income = (data.AnnualIncome - mean(data.AnnualIncome)) / std(data.AnnualIncome);
+data.AnnualIncome = stand_income; 
+
+stand_spending = (data.SpendingScore - mean(data.SpendingScore)) / std(data.SpendingScore);
+data.SpendingScore = stand_spending; 
+
+% Select columns for clustering
+selected_data = data(:,4:5);
+
+%Data must be an array to be used in clustering algorithm
+arrayed_data = table2array(selected_data);