ai-robotics-estonia
/
2023_Validating-applicability-of-an-artificial-intelligence-based-optimization-model
Public
generated from ai-robotics-estonia/_project_template_
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstatus.py
107 lines (88 loc) · 4.85 KB
/
status.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder
import json
import matplotlib.pyplot as plt
from ..lib.decorators import print_error
def status_analyze(data):
try:
num_clusters =5
num_problematic_workstations = 3 # Adjust as needed
show_craft=None #None, pq, pa
relevant_set_nr = ['produced', 'off', 'short', 'long', 'working', 'availability', 'performance', 'quality', 'oee']
relevant_set = relevant_set_nr +['workstation']
# Load the production data
# Display the first few rows and column descriptions
# Preprocess the data
data['timestamp'] = pd.to_datetime(data['timestamp'])
# Extract relevant features for clustering
X = data[relevant_set]
# Encode the 'workstation' column
label_encoder = LabelEncoder()
X['workstation_encoded'] = label_encoder.fit_transform(X['workstation'])
# Perform clustering analysis
kmeans = KMeans(n_clusters=num_clusters, random_state=42) # You can adjust the number of clusters as needed
kmeans.fit(X.drop(columns=['workstation', 'workstation_encoded']))
X['cluster'] = kmeans.labels_
# Add workstation names to cluster results
X['workstation'] = data['workstation']
X['timestamp'] = data['timestamp']
# Set minimum value to 0 for 'availability', 'performance', 'quality', and 'oee'
X[relevant_set_nr] = X[relevant_set_nr].clip(lower=0)
# Check for non-positive values in key columns, all values need to positive for logarithm scale
for key in ['availability', 'performance', 'quality', 'oee']:
if (X[key] <= 0).any():
X[key] = X[key].replace(0, 1e-6) # Replace zeros with a small positive number
X[key] = X[key].abs() # Ensure all values are positive
# Visualize the results
if show_craft != None:
plt.figure(figsize=(10, 6))
# Scatter plot of 'performance' vs 'quality' with clusters color-coded
for workstation_name in X['workstation'].unique():
workstation_data = X[X['workstation'] == workstation_name]
if show_craft =='pq':
plt.scatter(workstation_data['performance'], workstation_data['quality'],
label=f'Workstation {workstation_name}')
elif show_craft=='pa':
plt.scatter(workstation_data['performance'], workstation_data['availability'],
label=f'Workstation {workstation_name}')
plt.xlabel('Performance')
if show_craft == 'pq':
plt.ylabel('Quality')
elif show_craft == 'pa':
plt.ylabel('Availability')
plt.title('Clustering Analysis')
plt.legend()
plt.grid(True)
plt.show()
# Display the cluster centers (average values of features) in tabular form
cluster_centers = pd.DataFrame(kmeans.cluster_centers_,
columns=X.columns.drop(['workstation', 'cluster', 'workstation_encoded', 'timestamp']))
representative_workstations = []
representative_timestamps = []
for i in range(kmeans.n_clusters):
cluster_data = X[X['cluster'] == i]
most_common_workstation = cluster_data['workstation'].mode()[0]
median_timestamp = cluster_data['timestamp'].median()
representative_workstations.append(most_common_workstation)
representative_timestamps.append(median_timestamp)
# Add the representative workstation and timestamp to the cluster centers
cluster_centers['workstation'] = representative_workstations
cluster_centers['timestamp'] = representative_timestamps
# Identify the top 5 workstations whose efficiency should be increased
bottom_5_workstations = cluster_centers.sort_values(by='oee', ascending=False).head(5)
bottom_5_workstation_names = X.loc[X['workstation_encoded'].isin(bottom_5_workstations.index), 'workstation'].unique()
bottom_5_workstations_arr =[]
for v in bottom_5_workstation_names:
bottom_5_workstations_arr.append({'workstation':v})
# Identify workstation with potential issues
problematic_workstations = cluster_centers.sort_values(by='oee').head(num_problematic_workstations)
# Analyze which parameters are problematic for the identified workstation
# Ensure we only check numeric columns
return {
"ClusterCenters": json.loads(cluster_centers.to_json(orient='records')),
"Bottom5Workstations": bottom_5_workstations_arr,
"ProblematicWorkstations": json.loads(problematic_workstations.to_json(orient='records'))
}
except Exception as e:
print_error(e)