From f125681e424e09fc351a8b0d89b348e51f173b01 Mon Sep 17 00:00:00 2001 From: Julie Date: Sat, 1 Oct 2022 14:04:03 -0400 Subject: [PATCH] Dealing with long occurrences (i.e., skip them if they are longer than 90 days and write exception.token) --- doc/figures/plot_analyse_occurrence_all.py | 27 +++++++++++++-- src/analyse_occurrence.py | 39 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/doc/figures/plot_analyse_occurrence_all.py b/doc/figures/plot_analyse_occurrence_all.py index 889cb31..9fd72e8 100644 --- a/doc/figures/plot_analyse_occurrence_all.py +++ b/doc/figures/plot_analyse_occurrence_all.py @@ -48,6 +48,7 @@ results = {} nfiles = 0 +too_long = [] for zipfile in zipfiles: print("Read data from {}".format(zipfile)) @@ -67,8 +68,23 @@ # print("jsonfile = ",jsonfile) if len(jsonfile) != 1: - if len(jsonfile) == 0: - print("Event {} probably still processing. Skip for now.".format(str(unzippedfoldername))) + exceptionfile = glob.glob(str(unzippedfoldername)+'/exception.token') + if len(exceptionfile) == 1: + print("Occurrence {} not analysed because too long.".format(str(unzippedfoldername))) + ff = open(str(exceptionfile[0]), "r") + content = ff.read() + ff.close() + # analyse_occurrence: Occurrence 2c42d6b6-ae77-47c3-9fb5-0e4dde8e6719 (idx=212) not processed because it is too long (1151.0 days) + uuid = content.split(' ')[2] + idx = int(content.split('=')[1].split(')')[0]) + length_occurrence = float(content.split('(')[2].split(' ')[0]) + too_long.append({'uuid':uuid,'idx':idx,'length':length_occurrence}) + # remove unzipped files and folder if they were created here + if unpacked: + shutil.rmtree(unzippedfoldername) + continue + elif len(jsonfile) == 0: + print("Occurrence {} probably still processing. Skip for now.".format(str(unzippedfoldername))) # remove unzipped files and folder if they were created here if unpacked: shutil.rmtree(unzippedfoldername) @@ -182,13 +198,18 @@ np.sum(available_timesteps_n)+np.sum(missing_timesteps_n), np.sum(missing_timesteps_n)/(np.sum(available_timesteps_n)+np.sum(missing_timesteps_n))*100.)) print("\\item precipitation sum below 10~mm for {} of {} features".format(len(precip_small),len(features_idx))) -print("\\item precipitation sum above 1000~mm for {} of {} features \\\\(all have multi-year period specified in HFE database)\\\\".format(len(precip_large),len(features_idx))) +print("\\item precipitation sum above 1000~mm for {} of {} features \\\\".format(len(precip_large),len(features_idx))) for ii in precip_large: print(" {{\\scriptsize UUID: {} $\curvearrowright$ Start and end date = [{},{}]}}\\\\[-4pt]".format( #features_idx[ii], results[features_idx[ii]]['uuid'], results[features_idx[ii]]['results']['start_date_w_buffer'], results[features_idx[ii]]['results']['end_date_w_buffer'])) +print("\\item in total {} occurrences not analysed because too long\\\\".format(len(too_long))) +for itoo_long in too_long: + print(" {{\\scriptsize UUID: {} $\curvearrowright$ Length = {} [days]}}\\\\[-4pt]".format( + itoo_long['uuid'], + itoo_long['length'])) print("\\item no precipitation event found for {} of {} features\\\\".format(len(no_precip_event_found),len(features_idx))) for ii in no_precip_event_found: print(" {{\\scriptsize UUID: {} $\curvearrowright$ Start and end date = [{},{}]}}\\\\[-4pt]".format( diff --git a/src/analyse_occurrence.py b/src/analyse_occurrence.py index 706a8ab..3853825 100644 --- a/src/analyse_occurrence.py +++ b/src/analyse_occurrence.py @@ -172,6 +172,9 @@ def analyse_occurrence(ifeatures=None,tmpdir='/tmp/',bbox_buffer=0.5,dates_buffe # # all GEOMET # ifeatures = [2, 3, 4, 6, 38, 119, 123, 127, 137, 138, 139, 141, 142, 152, 220, 229, 367, 389, 405, 490, 510, 515, 516, 553, 560, 643, 838, 872, 876, 877, 882, 884, 894, 899, 902, 903, 909, 911, 916, 917, 942, 956, 964, 970, 972, 974, 980, 981, 1032, 1037, 1039, 1046, 1085, 1106, 1116, 1117, 1118, 1141, 1149, 1155, 1159, 1170, 1173, 1180, 1184, 1201, 1202, 1236, 1243, 1263, 1274, 1310, 1311, 1312, 1313, 1314, 1315, 1317, 1332, 1338, 1345, 1346, 1361, 1366, 1445, 1455, 1481, 1493, 1506, 1518, 1529, 1536, 1538, 1547, 1548, 1549, 1550, 1555, 1562, 1591, 1613, 1625, 1639, 1645, 1662, 1680, 1682, 1730, 1731, 1732, 1735, 1742, 1763, 1765, 1778, 1780, 1781, 1797, 1800, 1841, 1843, 1846, 1853, 1864, 1889] + # # all too long + # ifeatures = [921, 926, 927, 931, 1083, 1869] + # -------------------- # Load HFE database (occurrences) # -------------------- @@ -230,6 +233,39 @@ def analyse_occurrence(ifeatures=None,tmpdir='/tmp/',bbox_buffer=0.5,dates_buffe feature['properties']['flood_cause'], )) + # -------------------- + # Make sure occurrence is not super long (> 90. days) which would take a very long time to process + # -------------------- + if not(feature['properties']['end_date'] is None): + length_occurrence = (end_date-start_date).days+(end_date-start_date).seconds/60./60./24. + if (length_occurrence > 90.): + print("analyse_occurrence: Occurrence will NOT be analysed because it is TOO LONG:") + print(" >>> Length occurrence {} (idx={}): {} [days]".format( + feature['properties']['uuid'], + ifeature, + length_occurrence)) + result['png-ts'].append( [] ) + result['png'].append( [] ) + result['gif'].append( [] ) + result['legend'].append( [] ) + result['json'].append( [] ) + + # save a token file to note that this wont be processed + tokenfile = str(Path(tmpdir+'/analyse_occurrence_'+str(ifeature)+'/exception.token')) + + # make sure folder to store file exists; otherwise create + Path(tokenfile).parent.mkdir(parents=True, exist_ok=True) + + # save something + ff = open(tokenfile, "w") + ff.write("analyse_occurrence: Occurrence {} (idx={}) not processed because it is too long ({} days)".format( + feature['properties']['uuid'], + ifeature, + length_occurrence)) + ff.close() + + continue + # -------------------- # Determine bounding box # -------------------- @@ -685,3 +721,6 @@ def find_names_of_occurrences(event_feature,data_hfe_occur,locations): # for example, run for all Geomet features: # python analyse_occurrence.py --ifeatures "2, 3, 4, 6, 38, 119, 123, 127, 137, 138, 139, 141, 142, 152, 220, 229, 367, 389, 405, 490, 510, 515, 516, 553, 560, 643, 838, 872, 876, 877, 882, 884, 894, 899, 902, 903, 909, 911, 916, 917, 942, 956, 964, 970, 972, 974, 980, 981, 1032, 1037, 1039, 1046, 1085, 1106, 1116, 1117, 1118, 1141, 1149, 1155, 1159, 1170, 1173, 1180, 1184, 1201, 1202, 1236, 1243, 1263, 1274, 1310, 1311, 1312, 1313, 1314, 1315, 1317, 1332, 1338, 1345, 1346, 1361, 1366, 1445, 1455, 1481, 1493, 1506, 1518, 1529, 1536, 1538, 1547, 1548, 1549, 1550, 1555, 1562, 1591, 1613, 1625, 1639, 1645, 1662, 1680, 1682, 1730, 1731, 1732, 1735, 1742, 1763, 1765, 1778, 1780, 1781, 1797, 1800, 1841, 1843, 1846, 1853, 1864, 1889" --bbox_buffer 0.5 --dates_buffer 5.0,0.0 --tmpdir "/project/6070465/julemai/nrcan-hfe/data/output/" + + # for example, run for all features that are too long: + # python analyse_occurrence.py --ifeatures "921, 926, 927, 931, 1083, 1869" --bbox_buffer 0.5 --dates_buffer 5.0,0.0 --tmpdir "/project/6070465/julemai/nrcan-hfe/data/output/"