From c05a84a90f7300052964063d2bd3f1b2947cc8fc Mon Sep 17 00:00:00 2001
From: Lanqing Yuan <yuanlq@uchicago.edu>
Date: Fri, 5 Apr 2024 16:41:56 -0500
Subject: [PATCH] get_available_runs

---
 saltax/match/utils.py | 100 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/saltax/match/utils.py b/saltax/match/utils.py
index 702990a..db2a89e 100644
--- a/saltax/match/utils.py
+++ b/saltax/match/utils.py
@@ -6,6 +6,8 @@
 from itertools import cycle
 import saltax
 from scipy.stats import binomtest
+import utilix
+from glob import glob
 
 ALL_CUTS = np.array([
             'cut_daq_veto',
@@ -98,6 +100,104 @@
             'cut_cs2_area_fraction_top',])
 
 
+def find_runs_with_rawdata(
+        rawdata_folders=[
+            '/project/lgrandi/yuanlq/salt/raw_records/',
+            '/scratch/midway2/yuanlq/salt/raw_records/',
+            '/scratch/midway3/yuanlq/salt/raw_records/'
+        ]
+    ):
+    # Find the files that correspond to strax data
+    files_found = []
+    for folder in rawdata_folders:
+        _files_found = glob(folder+'0*')
+        files_found += _files_found
+
+    # Find the runs that have standard raw_records available
+    runs = []
+    for f in files_found:
+        _f = f.split('/')[-1]
+        runid, datatype, shash = _f.split('-')
+        if datatype == "raw_records" and shash == "rfzvpzj4mf":
+            runs.append(runid)
+    runs = np.array(runs)
+    return runs
+
+
+def is_stored_dtypes(st, runid, dtypes):
+    """
+    Check if all dtypes are stored for a run.
+    :param st: saltax context
+    :param runid: runid
+    :param dtypes: list of dtypes
+    :return: True if all dtypes are stored, False otherwise
+    """
+    if not len(dtypes):
+        return True
+    for dtype in dtypes:
+        if not st.is_stored(runid, dtype):
+            return False
+    return True
+
+
+def get_available_runs(runs, st_salt, st_simu,
+                       salt_available=['peak_basics', 'peak_positions_mlp'],
+                       simu_available=['peak_basics', 'peak_positions_mlp']):
+    """
+    Print out available runs for both salt and simu modes.
+    :param runs: list of runs.
+    :param st_salt: saltax context for salt mode
+    :param st_simu: saltax context for simu mode
+    :param salt_available: list of available dtypes for salt mode
+    :param simu_available: list of available dtypes for simu mode
+    """
+    rundb = utilix.rundb.xent_collection()
+    # Find run modes and duration correspondingly
+    modes = []
+    durations = []
+    for run in runs:
+        query = {'number': int(run)}
+        doc = rundb.find_one(query)
+
+        # get mode
+        mode = doc['mode']
+        # duration
+        td = doc['end'] - doc['start']
+        td_min = int(td.total_seconds()/60)
+
+        modes.append(mode)
+        durations.append(td_min)
+    modes = np.array(modes)
+    durations = np.array(durations)
+
+    # build dictionaries for modes and runs
+    modes_dict = {}
+    for mode in np.unique(modes):
+        modes_dict[mode] = runs[modes == mode]
+    durations_dict = {}
+    for i,run in enumerate(runs):
+        durations_dict[run] = durations[i]
+
+    # Prepare data for tabulate
+    available_runs = []
+    table_data = []
+    for mode, runids in modes_dict.items():
+        for runid in runids:
+            if int(runid) in runs:
+                if (is_stored_dtypes(st_salt, runid, salt_available) and 
+                    is_stored_dtypes(st_simu, runid, simu_available)):
+                    duration = durations_dict.get(runid, 'N/A')  # Get duration or 'N/A' if not found
+                    table_data.append([mode, runid, duration])
+                    available_runs.append(runid)
+
+    # Print table using tabulate
+    print(tabulate(table_data, headers=["mode", "runid", "duration [min]"]))
+    print("=============================")
+    print("The runs below are available:")
+    print(available_runs)
+    print("=============================")
+
+
 def load_peaks(runs, st_salt, st_simu, 
                plugins=('peak_basics', 'peak_positions_mlp')):
     """