Documentation, manual option for MAX_LOC_DMEG

FRBs · Dec 6, 2023 · 87f2184 · 87f2184
1 parent e66dc37
commit 87f2184
Show file tree

Hide file tree

Showing 13 changed files with 123 additions and 102 deletions.
diff --git a/zdm/MCMC.py b/zdm/MCMC.py
@@ -1,7 +1,7 @@
 """
 File: MCMC.py
 Author: Jordan Hoffmann
-Date: 28/09/23
+Date: 06/12/23
 Purpose: 
     Contains functions used for MCMC runs of the zdm code. MCMC_wrap.py is the 
     main function which does the calling and this holds functions which do the 
@@ -23,22 +23,21 @@
 
 #==============================================================================
 
-def mcmc_likelihoods(outfile:str, walkers:int, steps:int, params, surveys, grids, ncpus=1):
-    posterior_sample = mcmc_runner(calc_log_posterior, outfile, params, surveys, grids, nwalkers=walkers, nsteps=steps, ncpus=ncpus)
-
-    posterior_dict = {}
-    for i,k in enumerate(params):
-        posterior_dict[k] = posterior_sample[:,:,i]
-
-    with open(outfile+'.pkl', 'wb') as fp:
-        pickle.dump(posterior_dict, fp, pickle.HIGHEST_PROTOCOL)
-    # np.save(outfile,posterior_sample)
-
-    return
-
-#==============================================================================
-
 def calc_log_posterior(param_vals, params, surveys, grids):
+    """
+    Calculates the log posterior for a given set of parameters. Assumes uniform
+    priors between the minimum and maximum values provided in 'params'.
+
+    Inputs:
+        param_vals  =   Array of the parameter values for this step
+        params      =   Dictionary of the parameter names, min and max values
+        surveys     =   List of surveys being used
+        grids       =   List of grids corresponding to the surveys
+    
+    Outputs:
+        llsum       =   Total log likelihood for param_vals which is equivalent
+                        to log posterior (un-normalised) due to uniform priors
+    """
 
     t0 = time.time()
     # Can use likelihoods instead of posteriors because we only use uniform priors which just changes normalisation of posterior 
@@ -87,7 +86,25 @@ def calc_log_posterior(param_vals, params, surveys, grids):
 
 #==============================================================================
 
-def mcmc_runner(logpf, outfile, params, surveys, grids, nwalkers=10, nsteps=100, ncpus=1):
+def mcmc_runner(logpf, outfile, params, surveys, grids, nwalkers=10, nsteps=100, nthreads=1):
+    """
+    Handles the MCMC running.
+
+    Inputs:
+        logpf       =   Log posterior function handle
+        outfile     =   Name of the output file (excluding .h5 extension)
+        params      =   Dictionary of the parameter names, min and max values
+        surveys     =   List of surveys being used
+        grids       =   List of grids corresponding to the surveys
+        nwalkers    =   Number of walkers
+        nsteps      =   Number of steps per walker
+        nthreads    =   Number of threads to use for parallelised runs
+    
+    Outputs:
+        posterior_sample    =   Final sample
+        outfile.h5          =   HDF5 file containing the sampler
+    """
+
     ndim = len(params)
     starting_guesses = []
 
@@ -102,7 +119,7 @@ def mcmc_runner(logpf, outfile, params, surveys, grids, nwalkers=10, nsteps=100,
     backend.reset(nwalkers, ndim)
 
     start = time.time()
-    with mp.Pool(ncpus) as pool:
+    with mp.Pool(nthreads) as pool:
         sampler = emcee.EnsembleSampler(nwalkers, ndim, logpf, args=[params, surveys, grids], backend=backend, pool=pool)
         sampler.run_mcmc(starting_guesses, nsteps, progress=True)
     end = time.time()
@@ -115,6 +132,16 @@ def mcmc_runner(logpf, outfile, params, surveys, grids, nwalkers=10, nsteps=100,
 #==============================================================================
 
 def get_likelihood(grid, s):
+    """
+    Returns the likelihood for the grid given the survey.
+
+    Inputs:
+        grid    =   Grid used
+        s       =   Survey to compare with the grid
+    
+    Outputs:
+        llsum   =   Total loglikelihood for the grid
+    """
 
     if s.nD==1:
         llsum1, lllist, expected = it.calc_likelihoods_1D(grid, s, psnr=True, dolist=1)

diff --git a/zdm/MCMC_wrap.py b/zdm/MCMC_wrap.py
@@ -23,27 +23,38 @@
 
 parser = argparse.ArgumentParser()
 parser.add_argument(dest='files', nargs='+', help="Survey file names")
-parser.add_argument('-i','--initialise', default=None, type=str, help="Prefix used to initialise survey")
+parser.add_argument('-i','--initialise', default=None, type=str, help="Save surveys and grids with Pickle")
 parser.add_argument('-p','--pfile', default=None , type=str, help="File defining parameter ranges")
 parser.add_argument('-o','--opfile', default=None, type=str, help="Output file for the data")
 parser.add_argument('-w', '--walkers', default=20, type=int, help="Number of MCMC walkers")
 parser.add_argument('-s', '--steps', default=100, type=int, help="Number of MCMC steps")
-parser.add_argument('-n', '--ncpus', default=1, type=int, help="Number of CPU cores (used to determine number of parallel processes)")
+parser.add_argument('-n', '--nthreads', default=1, type=int, help="Number of threads")
 args = parser.parse_args()
 
 # Check correct flags are specified
 if args.pfile is None or args.opfile is None:
     if not (args.pfile is None and args.opfile is None):
-        print("All flags (except -i optional) are required unless this is only for initialisation in which case only -i should be specified.")
+        print("-p and -o flags are required")
         exit()
 
 #==============================================================================
 
 def main(args):
+    """
+    Handles the setup for MCMC runs. This involves reading / creating the
+    surveys and grids, reading the parameters and prior ranges and then 
+    beginning the MCMC run.
+
+    Inputs:
+        args = Command line parameters
+    
+    Outputs:
+        None
+    """
+
     names=args.files
     prefix=args.initialise
 
-
     ############## Initialise cosmology ##############
     # Location for maximisation output
     outdir='mcmc/'
@@ -55,8 +66,7 @@ def main(args):
     zDMgrid, zvals,dmvals=get_zdm_grid(state,new=True,plot=False,method='analytic',save=True,datdir='MCMCData')
 
     ############## Initialise surveys ##############
-
-    if not os.path.exists('Pickle/'+prefix+'surveys.pkl'):
+    if prefix is None or not os.path.exists('Pickle/'+prefix+'surveys.pkl'):
         # Initialise surveys
         surveys = []
         for name in names:
@@ -107,7 +117,7 @@ def main(args):
         # Select from dictionary the necessary parameters to be changed
         params = {k: mcmc_dict[k] for k in mcmc_dict['mcmc']['parameter_order']}
 
-        mcmc_likelihoods(outdir + args.opfile, args.walkers, args.steps, params, surveys, grids, ncpus=args.ncpus)
+        mcmc_runner(calc_log_posterior, outdir + args.opfile, params, surveys, grids, nwalkers=args.walkers, nsteps=args.steps, nthreads=args.nthreads)
     else:
         print("No parameter or output file provided. Assuming only initialising and no MCMC running is done.")
 

diff --git a/zdm/craco/loading.py b/zdm/craco/loading.py
@@ -108,7 +108,10 @@ def survey_and_grid(survey_name:str='CRAFT/CRACO_1_5000',
             0=power-law, 1=gamma, 2=gamma+spline.  Defaults to 0.
         state_dict (dict, optional):
             Used to init state instead of alpha_method, lum_func parameters
-
+        sdir (string, optional): Directory containing surveys
+        edir (string, optional): 
+            Directory containing efficiency files if using FRB-specific responses
+            
     Raises:
         IOError: [description]
 

diff --git a/zdm/data/Surveys/DSA.ecsv b/zdm/data/Surveys/DSA.ecsv
@@ -19,7 +19,7 @@
 # - {name: XRA, datatype: float64}
 # - {name: Z, datatype: float64}
 # meta: !!omap
-# - {survey_data: '{"observing": {"DISCARD_Zs": true, "FBAR": 1405.0, "MAX_DM": 1500}, "telescope": {"BMETHOD": 0, "BTHRESH": 0.001, "DIAM": 4.65, "NBEAMS": 1,
+# - {survey_data: '{"observing": {"MAX_LOC_DMEG": 0, "FBAR": 1405.0, "MAX_DM": 1500}, "telescope": {"BMETHOD": 0, "BTHRESH": 0.001, "DIAM": 4.65, "NBEAMS": 1,
 #     "NBINS": 10}}'}
 # schema: astropy-2.0
 TNS BW DM DMG FBAR FRES Gb Gl NREP SNR SNRTHRESH THRESH TRES WIDTH XDec XRA Z

diff --git a/zdm/data/Surveys/FAST.ecsv b/zdm/data/Surveys/FAST.ecsv
@@ -20,7 +20,7 @@
 # - {name: Z, datatype: string, subtype: 'float64[null]'}
 # meta: !!omap
 # - {survey_data: '{"observing": {"NORM_FRB": 4, "TOBS": 4.62, "MAX_DM": 5000}, "telescope": {"BEAM": "parkes_mb_log", "DIAM": 300.0, "NBEAMS": 19,
-#     "NBINS": 1}}'}
+#     "NBINS": 10}}'}
 # schema: astropy-2.0
 TNS BW DM DMG FBAR FRES Gb Gl NREP SNR SNRTHRESH THRESH TRES WIDTH XDec XRA Z
 181017 500.0 1845.2 34.96 1250.0 0.122 -51.4 117.9 1 17.0 7.0 0.0146 0.196608 1.43 "" "" ""

diff --git a/zdm/data/Surveys/FAST2.ecsv b/zdm/data/Surveys/FAST2.ecsv
@@ -20,7 +20,7 @@
 # - {name: Z, datatype: string, subtype: 'float64[null]'}
 # meta: !!omap
 # - {survey_data: '{"observing": {"NORM_FRB": 5, "TOBS": 2.59, "MAX_DM": 3700}, "telescope": {"BEAM": "parkes_mb_log", "DIAM": 300.0, "NBEAMS": 19,
-#     "NBINS": 1}}'}
+#     "NBINS": 10}}'}
 # schema: astropy-2.0
 TNS BW DM DMG FBAR FRES Gb Gl NREP SNR SNRTHRESH THRESH TRES WIDTH XDec XRA Z
 210126 500.0 1990.4 724.0 1250.0 0.122 "" "" 1 13.60 7.0 0.0146 0.196608 10.8 "" "" ""

diff --git a/zdm/iteration.py b/zdm/iteration.py
@@ -168,7 +168,7 @@ def maximise_likelihood(grid,survey):
 
 
 
-def calc_likelihoods_1D(grid,survey,doplot=False,norm=True,psnr=False,Pn=True,dolist=0):
+def calc_likelihoods_1D(grid,survey,doplot=False,norm=True,psnr=False,Pn=True,dolist=0,repeaters=False,singles=False):
     """ Calculates 1D likelihoods using only observedDM values
     Here, Zfrbs is a dummy variable allowing it to be treated like a 2D function
     for purposes of calling.
@@ -202,11 +202,11 @@ def calc_likelihoods_1D(grid,survey,doplot=False,norm=True,psnr=False,Pn=True,do
     idms2=idms1+1
     dkdms=kdms-idms1
 
-    if grid.state.MW.uDMG == 0.0:
+    if grid.state.MW.sigmaDMG == 0.0:
         # Linear interpolation
         pvals=pdm[idms1]*(1.-dkdms) + pdm[idms2]*dkdms
     else:
-        dm_weights, iweights = calc_DMG_weights(DMobs, survey.DMGs[survey.nozlist], grid.state.MW.uDMG, dmvals)
+        dm_weights, iweights = calc_DMG_weights(DMobs, survey.DMGs[survey.nozlist], grid.state.MW.sigmaDMG, dmvals)
         pvals = np.zeros(len(idms1))
         for i in range(len(idms1)):
             pvals[i]=np.sum(pdm[iweights[i]]*dm_weights[i])
@@ -268,7 +268,7 @@ def calc_likelihoods_1D(grid,survey,doplot=False,norm=True,psnr=False,Pn=True,do
 
         # get vector of thresholds as function of z and threshold/weight list
         # note that the dimensions are, nthresh (weights), z, DM
-        if grid.state.MW.uDMG == 0.0:
+        if grid.state.MW.sigmaDMG == 0.0:
             # Linear interpolation
             Eths = grid.thresholds[:,:,idms1]*(1.-dkdms)+ grid.thresholds[:,:,idms2]*dkdms
         else:
@@ -523,14 +523,14 @@ def calc_likelihoods_2D(grid,survey,
 
     # Calculate probability
 
-    if grid.state.MW.uDMG == 0.0:
+    if grid.state.MW.sigmaDMG == 0.0:
         # Linear interpolation
         pvals = rates[izs1,idms1]*(1.-dkdms)*(1-dkzs)
         pvals += rates[izs2,idms1]*(1.-dkdms)*dkzs
         pvals += rates[izs1,idms2]*dkdms*(1-dkzs)
         pvals += rates[izs2,idms2]*dkdms*dkzs
     else:
-        dm_weights, iweights = calc_DMG_weights(DMobs, survey.DMGs[survey.zlist], grid.state.MW.uDMG, dmvals)
+        dm_weights, iweights = calc_DMG_weights(DMobs, survey.DMGs[survey.zlist], grid.state.MW.sigmaDMG, dmvals)
         pvals = np.zeros(len(izs1))
         for i in range(len(izs1)):
             pvals[i] = np.sum(rates[izs1[i],iweights[i]] * dm_weights[i] * (1.-dkzs[i]) 
@@ -652,7 +652,7 @@ def calc_likelihoods_2D(grid,survey,
         gamma=grid.state.energy.gamma
         #Eths has dimensions of width likelihoods and nobs
         # i.e. later, the loop over j,w uses the first index
-        if grid.state.MW.uDMG == 0.0:
+        if grid.state.MW.sigmaDMG == 0.0:
             # Linear interpolation
             Eths = grid.thresholds[:,izs1,idms1]*(1.-dkdms)*(1-dkzs)
             Eths += grid.thresholds[:,izs2,idms1]*(1.-dkdms)*dkzs
@@ -765,14 +765,14 @@ def calc_likelihoods_2D(grid,survey,
     elif dolist==5:
         return llsum,lllist,expected,dolist5_return
 
-def calc_DMG_weights(DMEGs, DMGs, uDMGs, dmvals, n_sig=3):
+def calc_DMG_weights(DMEGs, DMGs, sigmaDMGs, dmvals, n_sig=3):
     """
     Given an uncertainty on the DMG value, calculate the weights of DM values to integrate over
 
     Inputs:
         DMEGs = Extragalactic DMs
         DMGs = Array of each DM_ISM value
-        uDMGs = Fractional uncertainty in DMG values
+        sigmaDMGs = Fractional uncertainty in DMG values
         dmvals = Vector of DM values used
         n_sig = number of standard deviations to integrate over
 
@@ -786,10 +786,10 @@ def calc_DMG_weights(DMEGs, DMGs, uDMGs, dmvals, n_sig=3):
     # Loop through the DMG of each FRB in the survey and determine the weights
     for i,DMG in enumerate(DMGs):
         # Get absolute uncertainty in DMG
-        uDMG = DMG * uDMGs
+        sigmaDMG = DMG * sigmaDMGs
 
         # Determine lower and upper DM values used
-        delta = n_sig*uDMG
+        delta = n_sig*sigmaDMG
         min = DMEGs[i] - delta
         # actual DMG can't be less than 0, so if delta is larger than DMG:
         # DMEG = DM_tot - DMG - DMhalo
@@ -805,7 +805,7 @@ def calc_DMG_weights(DMEGs, DMGs, uDMGs, dmvals, n_sig=3):
 
         # Get weights
         x = dmvals[idxs]
-        weight = st.norm.pdf(x, loc=DMEGs[i], scale=uDMG)
+        weight = st.norm.pdf(x, loc=DMEGs[i], scale=sigmaDMG)
         weight = weight / np.sum(weight)    # Re-normalise
         weights.append(weight)
         iweights.append(idxs)

diff --git a/zdm/parameters.py b/zdm/parameters.py
@@ -150,7 +150,7 @@ class MWParams(data_class.myDataClass):
         metadata={'help': 'Assumed DM for the Galactic ISM',
                   'unit': 'pc cm$^{-3}$',
         })
-    uDMG: float = field(
+    sigmaDMG: float = field(
         default=0.5,
         metadata={'help': 'Fractional uncertainty in DM from Galactic ISM',
                   'unit': '',

diff --git a/zdm/real_loading.py b/zdm/real_loading.py
@@ -87,7 +87,8 @@ def set_state(alpha_method=1, cosmo=Planck18):
 def surveys_and_grids(init_state=None, alpha_method=1, 
                       survey_names=None,
                       add_20220610A=False,
-                      nz:int=2000, ndm:int=5600): 
+                      nz:int=2000, ndm:int=5600,
+                      edir=''): 
     """ Load up a survey and grid for a real dataset
 
     Args:
@@ -104,6 +105,8 @@ def surveys_and_grids(init_state=None, alpha_method=1,
             Number of redshift bins
         ndm (int, optional):
             Number of DM bins
+        edir (string, optional):
+            Directory containing efficiency files if using FRB-specific responses
 
     Raises:
         IOError: [description]
@@ -143,7 +146,7 @@ def surveys_and_grids(init_state=None, alpha_method=1,
         print(f"Initializing {survey_name}")
         surveys.append(survey.load_survey(survey_name, 
                                           state, dmvals,
-                                          nbins=nbeam))
+                                          nbins=nbeam, edir=edir))
     print("Initialised surveys")
 
     # generates zdm grid

diff --git a/zdm/scripts/run_mcmc.slurm b/zdm/scripts/run_mcmc.slurm
@@ -1,6 +1,6 @@
 #!/bin/bash
-#SBATCH --job-name=CRAFT
-#SBATCH --output=../mcmc/Hoffmann2023_CRAFT_no_191228.out
+#SBATCH --job-name=DSA_FAST_CRAFT_4
+#SBATCH --output=../mcmc/DSA_FAST_CRAFT_4.out
 #SBATCH --ntasks=20
 #SBATCH --time=24:00:00
 #SBATCH --export=NONE
@@ -14,47 +14,23 @@ source $ZDM/.zdm_env/bin/activate
 
 cd $ZDM/zdm
 
-# outfile="Hoffmann2023_CRAFT_no_191228"
-outfile="DSA"
+outfile="DSA_FAST_CRAFT_4"
 walkers=40
-steps=1000
+steps=2000
 
-surveys="DSA3.ecsv" #"parkes_mb_class_I_and_II.ecsv" # DSA3.ecsv FAST.ecsv CRAFT_class_I_and_II.ecsv private_CRAFT_ICS_892.ecsv private_CRAFT_ICS_1300.ecsv private_CRAFT_ICS_1632.ecsv 
-prefix="DSAtest"
+surveys="DSA.ecsv FAST.ecsv FAST2.ecsv CRAFT_class_I_and_II.ecsv private_CRAFT_ICS_892.ecsv private_CRAFT_ICS_1300.ecsv private_CRAFT_ICS_1632.ecsv parkes_mb_class_I_and_II.ecsv"
 
+# Used when treating each FRB as an individual survey in HOFFMANN2023
 # cd data/Surveys/
 # surveys=$(ls -d -- Hoffmann2023/*[0-9]_exact.ecsv)
-# prefix="Hoffmann2023_exact_no_191228"
 # cd $ZDM/zdm
 
 # cd data/Surveys/
 # surveys=$(ls Hoffmann2023/*.ecsv)
-# prefix="Hoffmann2023_exact"
 # cd $ZDM/zdm
 
-# surveys="Hoffmann2023_CRAFT/private_CRAFT_ICS_892.ecsv Hoffmann2023_CRAFT/private_CRAFT_ICS_1300.ecsv"
-# prefix="Hoffmann2023_CRAFT"
-
-# surveys="Hoffmann2023_CRAFT/private_CRAFT_ICS_892.ecsv Hoffmann2023_CRAFT/private_CRAFT_ICS_1300_no_191228.ecsv"
-# prefix="Hoffmann2023_CRAFT_no_191228"
-
-# surveys="FAST.ecsv"
-# prefix="FAST"
-
-# surveys="CRAFT_class_I_and_II.ecsv CRAFT_ICS.ecsv CRAFT_ICS_892.ecsv parkes_mb_class_I_and_II.ecsv"
-# prefix="CRAFT"
-
-# surveys="FAST.ecsv CRAFT_class_I_and_II.ecsv CRAFT_ICS.ecsv CRAFT_ICS_892.ecsv parkes_mb_class_I_and_II.ecsv"
-# prefix="FASTnCRAFT"
-
-# surveys="FAST.ecsv CRAFT_class_I_and_II.ecsv private_CRAFT_ICS_892.ecsv private_CRAFT_ICS_1300.ecsv private_CRAFT_ICS_1632.ecsv parkes_mb_class_I_and_II.ecsv"
-# prefix="FASTnpCRAFT"
-
-# surveys="FAST_no_tobs.ecsv CRAFT_class_I_and_II.ecsv private_CRAFT_ICS_892.ecsv private_CRAFT_ICS_1300.ecsv private_CRAFT_ICS_1632.ecsv parkes_mb_class_I_and_II.ecsv"
-# prefix="FAST_no_tobs"
-
 echo "Outfile: $outfile.out"
 echo "Walkers: $walkers"
 echo "Steps: $steps"
 
-python MCMC_wrap.py $surveys -p tests/files/real_mini_mcmc2.json -o $outfile -w $walkers -s $steps -i $prefix -n 1
+python MCMC_wrap.py $surveys -p tests/files/real_mini_mcmc2.json -o $outfile -w $walkers -s $steps -n 20