Skip to content

Commit

Permalink
Documentation, manual option for MAX_LOC_DMEG
Browse files Browse the repository at this point in the history
  • Loading branch information
JordanHoffmann3 committed Dec 6, 2023
1 parent e66dc37 commit 87f2184
Show file tree
Hide file tree
Showing 13 changed files with 123 additions and 102 deletions.
63 changes: 45 additions & 18 deletions zdm/MCMC.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
File: MCMC.py
Author: Jordan Hoffmann
Date: 28/09/23
Date: 06/12/23
Purpose:
Contains functions used for MCMC runs of the zdm code. MCMC_wrap.py is the
main function which does the calling and this holds functions which do the
Expand All @@ -23,22 +23,21 @@

#==============================================================================

def mcmc_likelihoods(outfile:str, walkers:int, steps:int, params, surveys, grids, ncpus=1):
posterior_sample = mcmc_runner(calc_log_posterior, outfile, params, surveys, grids, nwalkers=walkers, nsteps=steps, ncpus=ncpus)

posterior_dict = {}
for i,k in enumerate(params):
posterior_dict[k] = posterior_sample[:,:,i]

with open(outfile+'.pkl', 'wb') as fp:
pickle.dump(posterior_dict, fp, pickle.HIGHEST_PROTOCOL)
# np.save(outfile,posterior_sample)

return

#==============================================================================

def calc_log_posterior(param_vals, params, surveys, grids):
"""
Calculates the log posterior for a given set of parameters. Assumes uniform
priors between the minimum and maximum values provided in 'params'.
Inputs:
param_vals = Array of the parameter values for this step
params = Dictionary of the parameter names, min and max values
surveys = List of surveys being used
grids = List of grids corresponding to the surveys
Outputs:
llsum = Total log likelihood for param_vals which is equivalent
to log posterior (un-normalised) due to uniform priors
"""

t0 = time.time()
# Can use likelihoods instead of posteriors because we only use uniform priors which just changes normalisation of posterior
Expand Down Expand Up @@ -87,7 +86,25 @@ def calc_log_posterior(param_vals, params, surveys, grids):

#==============================================================================

def mcmc_runner(logpf, outfile, params, surveys, grids, nwalkers=10, nsteps=100, ncpus=1):
def mcmc_runner(logpf, outfile, params, surveys, grids, nwalkers=10, nsteps=100, nthreads=1):
"""
Handles the MCMC running.
Inputs:
logpf = Log posterior function handle
outfile = Name of the output file (excluding .h5 extension)
params = Dictionary of the parameter names, min and max values
surveys = List of surveys being used
grids = List of grids corresponding to the surveys
nwalkers = Number of walkers
nsteps = Number of steps per walker
nthreads = Number of threads to use for parallelised runs
Outputs:
posterior_sample = Final sample
outfile.h5 = HDF5 file containing the sampler
"""

ndim = len(params)
starting_guesses = []

Expand All @@ -102,7 +119,7 @@ def mcmc_runner(logpf, outfile, params, surveys, grids, nwalkers=10, nsteps=100,
backend.reset(nwalkers, ndim)

start = time.time()
with mp.Pool(ncpus) as pool:
with mp.Pool(nthreads) as pool:
sampler = emcee.EnsembleSampler(nwalkers, ndim, logpf, args=[params, surveys, grids], backend=backend, pool=pool)
sampler.run_mcmc(starting_guesses, nsteps, progress=True)
end = time.time()
Expand All @@ -115,6 +132,16 @@ def mcmc_runner(logpf, outfile, params, surveys, grids, nwalkers=10, nsteps=100,
#==============================================================================

def get_likelihood(grid, s):
"""
Returns the likelihood for the grid given the survey.
Inputs:
grid = Grid used
s = Survey to compare with the grid
Outputs:
llsum = Total loglikelihood for the grid
"""

if s.nD==1:
llsum1, lllist, expected = it.calc_likelihoods_1D(grid, s, psnr=True, dolist=1)
Expand Down
24 changes: 17 additions & 7 deletions zdm/MCMC_wrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,38 @@

parser = argparse.ArgumentParser()
parser.add_argument(dest='files', nargs='+', help="Survey file names")
parser.add_argument('-i','--initialise', default=None, type=str, help="Prefix used to initialise survey")
parser.add_argument('-i','--initialise', default=None, type=str, help="Save surveys and grids with Pickle")
parser.add_argument('-p','--pfile', default=None , type=str, help="File defining parameter ranges")
parser.add_argument('-o','--opfile', default=None, type=str, help="Output file for the data")
parser.add_argument('-w', '--walkers', default=20, type=int, help="Number of MCMC walkers")
parser.add_argument('-s', '--steps', default=100, type=int, help="Number of MCMC steps")
parser.add_argument('-n', '--ncpus', default=1, type=int, help="Number of CPU cores (used to determine number of parallel processes)")
parser.add_argument('-n', '--nthreads', default=1, type=int, help="Number of threads")
args = parser.parse_args()

# Check correct flags are specified
if args.pfile is None or args.opfile is None:
if not (args.pfile is None and args.opfile is None):
print("All flags (except -i optional) are required unless this is only for initialisation in which case only -i should be specified.")
print("-p and -o flags are required")
exit()

#==============================================================================

def main(args):
"""
Handles the setup for MCMC runs. This involves reading / creating the
surveys and grids, reading the parameters and prior ranges and then
beginning the MCMC run.
Inputs:
args = Command line parameters
Outputs:
None
"""

names=args.files
prefix=args.initialise


############## Initialise cosmology ##############
# Location for maximisation output
outdir='mcmc/'
Expand All @@ -55,8 +66,7 @@ def main(args):
zDMgrid, zvals,dmvals=get_zdm_grid(state,new=True,plot=False,method='analytic',save=True,datdir='MCMCData')

############## Initialise surveys ##############

if not os.path.exists('Pickle/'+prefix+'surveys.pkl'):
if prefix is None or not os.path.exists('Pickle/'+prefix+'surveys.pkl'):
# Initialise surveys
surveys = []
for name in names:
Expand Down Expand Up @@ -107,7 +117,7 @@ def main(args):
# Select from dictionary the necessary parameters to be changed
params = {k: mcmc_dict[k] for k in mcmc_dict['mcmc']['parameter_order']}

mcmc_likelihoods(outdir + args.opfile, args.walkers, args.steps, params, surveys, grids, ncpus=args.ncpus)
mcmc_runner(calc_log_posterior, outdir + args.opfile, params, surveys, grids, nwalkers=args.walkers, nsteps=args.steps, nthreads=args.nthreads)
else:
print("No parameter or output file provided. Assuming only initialising and no MCMC running is done.")

Expand Down
5 changes: 4 additions & 1 deletion zdm/craco/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,10 @@ def survey_and_grid(survey_name:str='CRAFT/CRACO_1_5000',
0=power-law, 1=gamma, 2=gamma+spline. Defaults to 0.
state_dict (dict, optional):
Used to init state instead of alpha_method, lum_func parameters
sdir (string, optional): Directory containing surveys
edir (string, optional):
Directory containing efficiency files if using FRB-specific responses
Raises:
IOError: [description]
Expand Down
2 changes: 1 addition & 1 deletion zdm/data/Surveys/DSA.ecsv
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# - {name: XRA, datatype: float64}
# - {name: Z, datatype: float64}
# meta: !!omap
# - {survey_data: '{"observing": {"DISCARD_Zs": true, "FBAR": 1405.0, "MAX_DM": 1500}, "telescope": {"BMETHOD": 0, "BTHRESH": 0.001, "DIAM": 4.65, "NBEAMS": 1,
# - {survey_data: '{"observing": {"MAX_LOC_DMEG": 0, "FBAR": 1405.0, "MAX_DM": 1500}, "telescope": {"BMETHOD": 0, "BTHRESH": 0.001, "DIAM": 4.65, "NBEAMS": 1,
# "NBINS": 10}}'}
# schema: astropy-2.0
TNS BW DM DMG FBAR FRES Gb Gl NREP SNR SNRTHRESH THRESH TRES WIDTH XDec XRA Z
Expand Down
2 changes: 1 addition & 1 deletion zdm/data/Surveys/FAST.ecsv
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# - {name: Z, datatype: string, subtype: 'float64[null]'}
# meta: !!omap
# - {survey_data: '{"observing": {"NORM_FRB": 4, "TOBS": 4.62, "MAX_DM": 5000}, "telescope": {"BEAM": "parkes_mb_log", "DIAM": 300.0, "NBEAMS": 19,
# "NBINS": 1}}'}
# "NBINS": 10}}'}
# schema: astropy-2.0
TNS BW DM DMG FBAR FRES Gb Gl NREP SNR SNRTHRESH THRESH TRES WIDTH XDec XRA Z
181017 500.0 1845.2 34.96 1250.0 0.122 -51.4 117.9 1 17.0 7.0 0.0146 0.196608 1.43 "" "" ""
Expand Down
2 changes: 1 addition & 1 deletion zdm/data/Surveys/FAST2.ecsv
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# - {name: Z, datatype: string, subtype: 'float64[null]'}
# meta: !!omap
# - {survey_data: '{"observing": {"NORM_FRB": 5, "TOBS": 2.59, "MAX_DM": 3700}, "telescope": {"BEAM": "parkes_mb_log", "DIAM": 300.0, "NBEAMS": 19,
# "NBINS": 1}}'}
# "NBINS": 10}}'}
# schema: astropy-2.0
TNS BW DM DMG FBAR FRES Gb Gl NREP SNR SNRTHRESH THRESH TRES WIDTH XDec XRA Z
210126 500.0 1990.4 724.0 1250.0 0.122 "" "" 1 13.60 7.0 0.0146 0.196608 10.8 "" "" ""
Expand Down
24 changes: 12 additions & 12 deletions zdm/iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def maximise_likelihood(grid,survey):



def calc_likelihoods_1D(grid,survey,doplot=False,norm=True,psnr=False,Pn=True,dolist=0):
def calc_likelihoods_1D(grid,survey,doplot=False,norm=True,psnr=False,Pn=True,dolist=0,repeaters=False,singles=False):
""" Calculates 1D likelihoods using only observedDM values
Here, Zfrbs is a dummy variable allowing it to be treated like a 2D function
for purposes of calling.
Expand Down Expand Up @@ -202,11 +202,11 @@ def calc_likelihoods_1D(grid,survey,doplot=False,norm=True,psnr=False,Pn=True,do
idms2=idms1+1
dkdms=kdms-idms1

if grid.state.MW.uDMG == 0.0:
if grid.state.MW.sigmaDMG == 0.0:
# Linear interpolation
pvals=pdm[idms1]*(1.-dkdms) + pdm[idms2]*dkdms
else:
dm_weights, iweights = calc_DMG_weights(DMobs, survey.DMGs[survey.nozlist], grid.state.MW.uDMG, dmvals)
dm_weights, iweights = calc_DMG_weights(DMobs, survey.DMGs[survey.nozlist], grid.state.MW.sigmaDMG, dmvals)
pvals = np.zeros(len(idms1))
for i in range(len(idms1)):
pvals[i]=np.sum(pdm[iweights[i]]*dm_weights[i])
Expand Down Expand Up @@ -268,7 +268,7 @@ def calc_likelihoods_1D(grid,survey,doplot=False,norm=True,psnr=False,Pn=True,do

# get vector of thresholds as function of z and threshold/weight list
# note that the dimensions are, nthresh (weights), z, DM
if grid.state.MW.uDMG == 0.0:
if grid.state.MW.sigmaDMG == 0.0:
# Linear interpolation
Eths = grid.thresholds[:,:,idms1]*(1.-dkdms)+ grid.thresholds[:,:,idms2]*dkdms
else:
Expand Down Expand Up @@ -523,14 +523,14 @@ def calc_likelihoods_2D(grid,survey,

# Calculate probability

if grid.state.MW.uDMG == 0.0:
if grid.state.MW.sigmaDMG == 0.0:
# Linear interpolation
pvals = rates[izs1,idms1]*(1.-dkdms)*(1-dkzs)
pvals += rates[izs2,idms1]*(1.-dkdms)*dkzs
pvals += rates[izs1,idms2]*dkdms*(1-dkzs)
pvals += rates[izs2,idms2]*dkdms*dkzs
else:
dm_weights, iweights = calc_DMG_weights(DMobs, survey.DMGs[survey.zlist], grid.state.MW.uDMG, dmvals)
dm_weights, iweights = calc_DMG_weights(DMobs, survey.DMGs[survey.zlist], grid.state.MW.sigmaDMG, dmvals)
pvals = np.zeros(len(izs1))
for i in range(len(izs1)):
pvals[i] = np.sum(rates[izs1[i],iweights[i]] * dm_weights[i] * (1.-dkzs[i])
Expand Down Expand Up @@ -652,7 +652,7 @@ def calc_likelihoods_2D(grid,survey,
gamma=grid.state.energy.gamma
#Eths has dimensions of width likelihoods and nobs
# i.e. later, the loop over j,w uses the first index
if grid.state.MW.uDMG == 0.0:
if grid.state.MW.sigmaDMG == 0.0:
# Linear interpolation
Eths = grid.thresholds[:,izs1,idms1]*(1.-dkdms)*(1-dkzs)
Eths += grid.thresholds[:,izs2,idms1]*(1.-dkdms)*dkzs
Expand Down Expand Up @@ -765,14 +765,14 @@ def calc_likelihoods_2D(grid,survey,
elif dolist==5:
return llsum,lllist,expected,dolist5_return

def calc_DMG_weights(DMEGs, DMGs, uDMGs, dmvals, n_sig=3):
def calc_DMG_weights(DMEGs, DMGs, sigmaDMGs, dmvals, n_sig=3):
"""
Given an uncertainty on the DMG value, calculate the weights of DM values to integrate over
Inputs:
DMEGs = Extragalactic DMs
DMGs = Array of each DM_ISM value
uDMGs = Fractional uncertainty in DMG values
sigmaDMGs = Fractional uncertainty in DMG values
dmvals = Vector of DM values used
n_sig = number of standard deviations to integrate over
Expand All @@ -786,10 +786,10 @@ def calc_DMG_weights(DMEGs, DMGs, uDMGs, dmvals, n_sig=3):
# Loop through the DMG of each FRB in the survey and determine the weights
for i,DMG in enumerate(DMGs):
# Get absolute uncertainty in DMG
uDMG = DMG * uDMGs
sigmaDMG = DMG * sigmaDMGs

# Determine lower and upper DM values used
delta = n_sig*uDMG
delta = n_sig*sigmaDMG
min = DMEGs[i] - delta
# actual DMG can't be less than 0, so if delta is larger than DMG:
# DMEG = DM_tot - DMG - DMhalo
Expand All @@ -805,7 +805,7 @@ def calc_DMG_weights(DMEGs, DMGs, uDMGs, dmvals, n_sig=3):

# Get weights
x = dmvals[idxs]
weight = st.norm.pdf(x, loc=DMEGs[i], scale=uDMG)
weight = st.norm.pdf(x, loc=DMEGs[i], scale=sigmaDMG)
weight = weight / np.sum(weight) # Re-normalise
weights.append(weight)
iweights.append(idxs)
Expand Down
2 changes: 1 addition & 1 deletion zdm/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class MWParams(data_class.myDataClass):
metadata={'help': 'Assumed DM for the Galactic ISM',
'unit': 'pc cm$^{-3}$',
})
uDMG: float = field(
sigmaDMG: float = field(
default=0.5,
metadata={'help': 'Fractional uncertainty in DM from Galactic ISM',
'unit': '',
Expand Down
7 changes: 5 additions & 2 deletions zdm/real_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def set_state(alpha_method=1, cosmo=Planck18):
def surveys_and_grids(init_state=None, alpha_method=1,
survey_names=None,
add_20220610A=False,
nz:int=2000, ndm:int=5600):
nz:int=2000, ndm:int=5600,
edir=''):
""" Load up a survey and grid for a real dataset
Args:
Expand All @@ -104,6 +105,8 @@ def surveys_and_grids(init_state=None, alpha_method=1,
Number of redshift bins
ndm (int, optional):
Number of DM bins
edir (string, optional):
Directory containing efficiency files if using FRB-specific responses
Raises:
IOError: [description]
Expand Down Expand Up @@ -143,7 +146,7 @@ def surveys_and_grids(init_state=None, alpha_method=1,
print(f"Initializing {survey_name}")
surveys.append(survey.load_survey(survey_name,
state, dmvals,
nbins=nbeam))
nbins=nbeam, edir=edir))
print("Initialised surveys")

# generates zdm grid
Expand Down
38 changes: 7 additions & 31 deletions zdm/scripts/run_mcmc.slurm
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#SBATCH --job-name=CRAFT
#SBATCH --output=../mcmc/Hoffmann2023_CRAFT_no_191228.out
#SBATCH --job-name=DSA_FAST_CRAFT_4
#SBATCH --output=../mcmc/DSA_FAST_CRAFT_4.out
#SBATCH --ntasks=20
#SBATCH --time=24:00:00
#SBATCH --export=NONE
Expand All @@ -14,47 +14,23 @@ source $ZDM/.zdm_env/bin/activate

cd $ZDM/zdm

# outfile="Hoffmann2023_CRAFT_no_191228"
outfile="DSA"
outfile="DSA_FAST_CRAFT_4"
walkers=40
steps=1000
steps=2000

surveys="DSA3.ecsv" #"parkes_mb_class_I_and_II.ecsv" # DSA3.ecsv FAST.ecsv CRAFT_class_I_and_II.ecsv private_CRAFT_ICS_892.ecsv private_CRAFT_ICS_1300.ecsv private_CRAFT_ICS_1632.ecsv
prefix="DSAtest"
surveys="DSA.ecsv FAST.ecsv FAST2.ecsv CRAFT_class_I_and_II.ecsv private_CRAFT_ICS_892.ecsv private_CRAFT_ICS_1300.ecsv private_CRAFT_ICS_1632.ecsv parkes_mb_class_I_and_II.ecsv"

# Used when treating each FRB as an individual survey in HOFFMANN2023
# cd data/Surveys/
# surveys=$(ls -d -- Hoffmann2023/*[0-9]_exact.ecsv)
# prefix="Hoffmann2023_exact_no_191228"
# cd $ZDM/zdm

# cd data/Surveys/
# surveys=$(ls Hoffmann2023/*.ecsv)
# prefix="Hoffmann2023_exact"
# cd $ZDM/zdm

# surveys="Hoffmann2023_CRAFT/private_CRAFT_ICS_892.ecsv Hoffmann2023_CRAFT/private_CRAFT_ICS_1300.ecsv"
# prefix="Hoffmann2023_CRAFT"

# surveys="Hoffmann2023_CRAFT/private_CRAFT_ICS_892.ecsv Hoffmann2023_CRAFT/private_CRAFT_ICS_1300_no_191228.ecsv"
# prefix="Hoffmann2023_CRAFT_no_191228"

# surveys="FAST.ecsv"
# prefix="FAST"

# surveys="CRAFT_class_I_and_II.ecsv CRAFT_ICS.ecsv CRAFT_ICS_892.ecsv parkes_mb_class_I_and_II.ecsv"
# prefix="CRAFT"

# surveys="FAST.ecsv CRAFT_class_I_and_II.ecsv CRAFT_ICS.ecsv CRAFT_ICS_892.ecsv parkes_mb_class_I_and_II.ecsv"
# prefix="FASTnCRAFT"

# surveys="FAST.ecsv CRAFT_class_I_and_II.ecsv private_CRAFT_ICS_892.ecsv private_CRAFT_ICS_1300.ecsv private_CRAFT_ICS_1632.ecsv parkes_mb_class_I_and_II.ecsv"
# prefix="FASTnpCRAFT"

# surveys="FAST_no_tobs.ecsv CRAFT_class_I_and_II.ecsv private_CRAFT_ICS_892.ecsv private_CRAFT_ICS_1300.ecsv private_CRAFT_ICS_1632.ecsv parkes_mb_class_I_and_II.ecsv"
# prefix="FAST_no_tobs"

echo "Outfile: $outfile.out"
echo "Walkers: $walkers"
echo "Steps: $steps"

python MCMC_wrap.py $surveys -p tests/files/real_mini_mcmc2.json -o $outfile -w $walkers -s $steps -i $prefix -n 1
python MCMC_wrap.py $surveys -p tests/files/real_mini_mcmc2.json -o $outfile -w $walkers -s $steps -n 20
Loading

0 comments on commit 87f2184

Please sign in to comment.