From 46fb1e1f1cb50d8323dd05a10a867dc003ceef21 Mon Sep 17 00:00:00 2001 From: gradedSystem Date: Thu, 7 Nov 2024 14:59:15 +0800 Subject: [PATCH] [fix-data][s] Fixing up the data issue format - #34 --- data/code-list.csv | 1 - scripts/prepare.py | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/data/code-list.csv b/data/code-list.csv index 5e8f3bdc9a..0d6ab7ed2a 100644 --- a/data/code-list.csv +++ b/data/code-list.csv @@ -61393,7 +61393,6 @@ x,ZA,NGQ,Port of Ngqura,Port of Ngqura,EC,XX,1----6--,2307,,3346S 02540E, ,ZW,VFA,Victoria Falls,Victoria Falls,,AI,---4----,9501,,, ,ZW,ZMZ,Zimbabwe,Zimbabwe,MV,RL,1-3-----,1401,,2016S 03055E, ,ZW,ZVS,Zvishavane,Zvishavane,MI,RL,--3-----,1207,,2020S 03002E, -Change,Country,Location,Name,NameWoDiacritics,Subdivision,Function,Status,Date,IATA,Coordinates,Remarks ,AO,FBY,Farta Bay,Farta Bay,,1-------,RQ,0901,,1237S 01312E, ,AO,FLT,Futila Terminal,Futila Terminal,,1-------,RQ,0901,,0527S 01211E, ,AO,GIM,Gimboa,Gimboa,,1-------,RL,1107,,0732S 01210E, diff --git a/scripts/prepare.py b/scripts/prepare.py index 2770741c86..822a54d76d 100644 --- a/scripts/prepare.py +++ b/scripts/prepare.py @@ -91,7 +91,10 @@ def process(extracted_files): codelist_df = pd.DataFrame(codelist_list) #)=pd.concat(codelist_list) codelist_df = codelist_df.reindex(columns=['Change', 'Country', 'Location', 'Name', 'NameWoDiacritics', 'Subdivision', 'Status', 'Function', 'Date', 'IATA', 'Coordinates', 'Remarks']) + # Keep only rows where 'Country' values are empty, 1 character, or exactly 2 characters + codelist_df = codelist_df[codelist_df['Country'].str.len().fillna(0).between(0, 2)] codelist_df.to_csv(f"data/code-list.csv", index=False) + alias_df.to_csv(f"data/alias.csv", index=False) print("Processed and saved UNLOCODE files") return