config.yaml

verbose: True
seed: 94305  #random seed

simulation:
  run: True # whether or not to run simulation (can be skipped if previously done)
  path: # where to store the simulated data, default is <output_folder>/generated_data/ if run is False this is where the simulation data will be sought
  splits:
    ratios: # how to split the data for training the model
      train1: 0.8
      train2: 0.15
      val: 0.05 # set to 0 in order to not perform any validation after training the model
    gens:
      train1: [0, 2, 4, 6, 8, 12, 16, 24]
      train2: [0, 2, 4, 6, 8, 12, 16, 24]
      val: [2, 4, 6, 8, 12, 16, 24]
  gens: [0, 2, 4, 6, 8, 12, 16, 24]  # which generations to simulate, ... 
  # ... not critical some accuracy can be squeezed if it better represents the query data
  r_admixed: 0.6        # set it lower if memory is an issue
  rm_data: False      # removes simulated data once it has been used (to reduce disk space usage)

model: 
  name: model         # model name for storing model + analysis
  inference:          # If blank, we use default (best, fast, large are alternative options)
  window_size_cM: 0.2 # size of each base window in centi Morgans, use larger windows if snp density is lower (for example for genotype data)
  smooth_size: 75     # How many windows the smoothers see
  context_ratio: 0.5  # context for each window will be context_ratio*window_size on each side
  retrain_base: True  # retrain the base models once smoother data has been obtained
  calibrate: False    # calibrate the output probabilities
  n_cores: 8         # number of CPUs to use, default is maximum

inference:
  bed_file_output: False # generate files for each individual that show the run length encoding of their ancestry segments
  snp_level_inference: False # output ancestry inference for each marker of the query file
  visualize_inference: False # visualize chromosome paintings, beta version