Skip to content

Commit

Permalink
Add ht schema generation workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
mattsolo1 committed Sep 19, 2023
1 parent 68b9d29 commit 6a86005
Show file tree
Hide file tree
Showing 5 changed files with 768 additions and 83 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,338 @@
----------------------------------------
Global fields:
'freq_meta': array<dict<str, str>>
'freq_index_dict': dict<str, int32>
'faf_meta': array<dict<str, str>>
'faf_index_dict': dict<str, int32>
'freq_sample_count': array<int32>
'filtering_model': struct {
model_name: str,
score_name: str,
feature_medians: dict<tuple (
str
), struct {
a_index: int32,
n_alt_alleles: int32,
AS_pab_max: float64,
AS_MQRankSum: float64,
AS_SOR: float64,
AS_ReadPosRankSum: float64
}>,
variants_by_strata: dict<tuple (
str
), int64>,
features_importance: dict<str, float64>,
features: array<str>,
test_results: array<struct {
rf_prediction: str,
rf_label: str,
n: int32
}>,
rf_snv_cutoff: struct {
bin: float64,
min_score: float64
},
rf_indel_cutoff: struct {
bin: float64,
min_score: float64
},
inbreeding_cutoff: float64,
model_id: str
}
'tool_versions': struct {
dbsnp_version: str,
cadd_version: str,
revel_version: str,
splicaai_version: str,
primateai_version: str,
pangolin_version: str,
vrs_version: str
}
'vep_globals': struct {
vep_version: str,
vep_csq_header: str,
vep_help: str,
vep_config: str
}
'age_distribution': struct {
bin_edges: array<float64>,
bin_freq: array<int32>,
n_smaller: int32,
n_larger: int32
}
'age_index_dict': dict<str, int32>
'age_meta': array<dict<str, str>>
'grpmax_index_dict': dict<str, int32>
'grpmax_meta': array<dict<str, str>>
'README': dict<str, str>
'gnomad_qc_repo': str
'gnomad_methods_repo': str
----------------------------------------
Row fields:
'locus': locus<GRCh38>
'alleles': array<str>
'freq': array<struct {
AC: int32,
AF: float64,
AN: int32,
homozygote_count: int32
}>
'grpmax': array<struct {
AC: int32,
AF: float64,
AN: int32,
homozygote_count: int32,
grp: str,
faf95: float64
}>
'faf': array<struct {
faf95: float64,
faf99: float64
}>
'a_index': int32
'was_split': bool
'rsid': set<str>
'filters': set<str>
'info': struct {
QUALapprox: int64,
SB: array<int32>,
MQ: float64,
MQRankSum: float64,
VarDP: int32,
AS_ReadPosRankSum: float64,
AS_pab_max: float64,
AS_QD: float32,
AS_MQ: float64,
QD: float32,
AS_MQRankSum: float64,
FS: float64,
AS_FS: float64,
ReadPosRankSum: float64,
AS_QUALapprox: int64,
AS_SB_TABLE: array<int32>,
AS_VarDP: int32,
AS_SOR: float64,
SOR: float64,
singleton: bool,
transmitted_singleton: bool,
omni: bool,
mills: bool,
monoallelic: bool,
AS_VQSLOD: float64,
InbreedingCoeff: float64,
vrs: struct {
VRS_Allele_IDs: array<str>,
VRS_Starts: array<int32>,
VRS_Ends: array<int32>,
VRS_States: array<str>
}
}
'vep': struct {
allele_string: str,
end: int32,
id: str,
input: str,
intergenic_consequences: array<struct {
allele_num: int32,
consequence_terms: array<str>,
impact: str,
variant_allele: str
}>,
most_severe_consequence: str,
motif_feature_consequences: array<struct {
allele_num: int32,
consequence_terms: array<str>,
high_inf_pos: str,
impact: str,
motif_feature_id: str,
motif_name: str,
motif_pos: int32,
motif_score_change: float64,
transcription_factors: array<str>,
strand: int32,
variant_allele: str
}>,
regulatory_feature_consequences: array<struct {
allele_num: int32,
biotype: str,
consequence_terms: array<str>,
impact: str,
regulatory_feature_id: str,
variant_allele: str
}>,
seq_region_name: str,
start: int32,
strand: int32,
transcript_consequences: array<struct {
allele_num: int32,
amino_acids: str,
appris: str,
biotype: str,
canonical: int32,
ccds: str,
cdna_start: int32,
cdna_end: int32,
cds_end: int32,
cds_start: int32,
codons: str,
consequence_terms: array<str>,
distance: int32,
domains: array<struct {
db: str,
name: str
}>,
exon: str,
flags: str,
gene_id: str,
gene_pheno: int32,
gene_symbol: str,
gene_symbol_source: str,
hgnc_id: str,
hgvsc: str,
hgvsp: str,
hgvs_offset: int32,
impact: str,
intron: str,
lof: str,
lof_flags: str,
lof_filter: str,
lof_info: str,
mane_select: str,
mane_plus_clinical: str,
mirna: array<str>,
polyphen_prediction: str,
polyphen_score: float64,
protein_end: int32,
protein_start: int32,
protein_id: str,
sift_prediction: str,
sift_score: float64,
source: str,
strand: int32,
transcript_id: str,
tsl: int32,
uniprot_isoform: array<str>,
variant_allele: str
}>,
variant_class: str
}
'rf': struct {
rf_positive_label: bool,
rf_negative_label: bool,
rf_label: str,
rf_train: bool,
rf_tp_probability: float64
}
'region_flag': struct {
lcr: bool,
segdup: bool,
non_par: bool
}
'allele_info': struct {
variant_type: str,
allele_type: str,
n_alt_alleles: int32,
was_mixed: bool
}
'histograms': struct {
qual_hists: struct {
gq_hist_all: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
},
dp_hist_all: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
},
gq_hist_alt: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
},
dp_hist_alt: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
},
ab_hist_alt: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
}
},
raw_qual_hists: struct {
gq_hist_all: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
},
dp_hist_all: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
},
gq_hist_alt: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
},
dp_hist_alt: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
},
ab_hist_alt: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
}
},
age_hists: array<struct {
age_hist_hom: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
},
age_hist_ht: struct {
bin_edges: array<float64>,
bin_freq: array<int64>,
n_smaller: int64,
n_larger: int64
}
}>
}
'in_silico_predictors': struct {
cadd: struct {
phred: float32,
raw_score: float32,
has_duplicate: bool
},
revel: struct {
revel_score: float64,
has_duplicate: bool
},
splice_ai: struct {
splice_ai_score: float32,
splice_consequence: str,
has_duplicate: bool
},
pangolin: struct {
pangolin_score: float64
}
}
----------------------------------------
Key: ['locus', 'alleles']
----------------------------------------
Loading

0 comments on commit 6a86005

Please sign in to comment.