diff --git a/.gitignore b/.gitignore index b5a760b..721f9d0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,13 @@ singularity_cache/ # ignore large motif mapping files *motif_mappings*.bed +# ignore icres files (too large for repo) +*icres*.bed + +# ignore iCREs output files (confidentiality until publication) +example/outputs_icres/* +!example/outputs_icres/.gitkeep + # ignore nf-test executable nf-test @@ -18,8 +25,8 @@ nf-test tests/outputs/ # ignore SLURM output and error files -slurm.*.out -slurm.*.err +slurm*.out +slurm*.err # ignore jupyter notebook checkpoints .ipynb_checkpoints/ diff --git a/README.md b/README.md index 188b2ff..722c6ea 100644 --- a/README.md +++ b/README.md @@ -13,12 +13,10 @@ MINI-AC uses a dual license to offer the distribution of the software under a pr Currently, two species are supported by MINI-AC: *Arabidopsis thaliana* and two maize genome versions (B73 RefGen_v4 and B73 RefGen_v5). Additionally, it can be run on two different modes depending on the non-coding genomic space considered for motif mapping: * **genome-wide**: strategy where the whole non-coding genome is considered for motif mappings. It captures all the ACRs of the input dataset for the GRN prediction, which is adviced when working with species with long intergenic regions and distal regulatory elements, like maize for example. -* **locus-based**: strategy where the neighboring sequences within a pre-defined window of each locus, and introns are considered for motif mapping. It only captures the proximal ACRs of the input dataset within the pre-defined window, which can lead to missing distal ACRs in species with long intergenic regions and distal regulatory elements. However, it has the advantage of having a higher density of TFBS, which are mostly located close to the genes. The locus-based mode uses a "medium" non-coding genomic space, which corresponds, for each locus in the genome, to the 5kb upstream of the translation start site, the 1kb downstream of the translation end site, and the introns. However, for maize (but not for Arabidopsis; see publication), we generated two additional motif mapping files for the locus-based mode, that cover "large" (15kb upstream of the translation start site, the 2.5kb downstream of the translation end site, and the introns), and "small" (1kb upstream of the translation start site, the 1kb downstream of the translation end site, and the introns) non-coding genomic spaces. To use these files, check the instructions [here](docs/configuration_pipeline.md). - +* **locus-based**: strategy where the neighboring sequences within a pre-defined window of each locus, and introns are considered for motif mapping. It only captures the proximal ACRs of the input dataset within the pre-defined window, which can lead to missing distal ACRs in species with long intergenic regions and distal regulatory elements. However, it has the advantage of having a higher density of TFBS, which are mostly located close to the genes. The locus-based mode uses a "medium" non-coding genomic space, which corresponds, for each locus in the genome, to the 5kb upstream of the translation start site, the 1kb downstream of the translation end site, and the introns. However, for maize (but not for Arabidopsis; see publication), we generated two additional motif mapping files for the locus-based mode, that cover "large" (15kb upstream of the translation start site, the 2.5kb downstream of the translation end site, and the introns), and "small" (1kb upstream of the translation start site, the 1kb downstream of the translation end site, and the introns) non-coding genomic spaces. To use these files, check the instructions [here](docs/pipeline_configuration.md). A detailed overview of the necessary input files and expected output files can be found in this [example](example), done on **maize V4 with the genome-wide mode**, and using as input a single-cell-derived ACR dataset of mesophyll and bundle sheath. - ## **Inputs** * **MINI-AC mode**: genome-wide or locus-based. * **Species**: Arabidopsis or maize (maize genome version 4 or 5). @@ -63,8 +61,7 @@ NOTE: MINI-AC was developed using the following versions: Nextflow version 21.10 ## Usage - -Define the paths with the input files and the desired parameters setting in the [configuration file](docs/configuration_pipeline.md), and run it executing the following Nextflow command: +Define the paths with the input files and the desired parameters setting in the [configuration file](docs/pipeline_configuration.md), and run it executing the following Nextflow command: ```shell nextflow -C mini_ac.config run mini_ac.nf --mode --species @@ -72,6 +69,23 @@ nextflow -C mini_ac.config run mini_ac.nf --mode --spe Having problems running MINI-AC? Check the [FAQ](docs/FAQ.md). +## iCREs-based MINI-AC [NOT AVAILABLE UNTIL PUBLICATION] + +Given the amount of resources available to profile regulatory DNA in maize, we curated a collection of integrated cis-regulatory elements (iCREs) by combining and comparing different CRE-profiling methods (details to be published). + +We implemented a new framework in which it is possible to run MINI-AC given a list of maize genes. It works by retrieving the genomic coordinates of the iCREs associated with genes of interest, and submitting them to motif enrichment and GRN inference using the genome-wide mode of MINI-AC. iCREs-based MINI-AC can only be run for maize, and not for Arabidopsis. In addition, we offer different sets of iCREs that are used in the run: the "maxF1" (`maxf1`) set or the "all" (`all`) set. The first uses a set of putative CREs that is smaller but more precise (less false positives), while the second uses a more comprehensive and complete collection of maize putative CREs. + +To download files with the genomic coordinates of the iCREs, the following commands should be executed on the **top-level directory of the repository**: + +```shell +NOT AVAILABLE UNTIL PUBLICATION +``` + +To run iCREs-based MINI-AC, the [configuration file](./mini_ac_icres.config) should be prepared as explained [here](./docs/pipeline_configuration.md). Only two parameters change in comparison to the regular MINI-AC runs. Instead of providing a BED file with ACR genomic coordinates, a list of gene IDs from the maize genome version V4 or V5 should be provided, as exemplified [here](./example/inputs/gene_set_files/UP_gene_set.txt). In addition, an iCREs set should be specified (`maxf1` or `all`). Next, the following Nextflow command should be executed: + +```shell +nextflow -C mini_ac_icres.config run mini_ac_icres.nf --icres_set --species +``` ## Support @@ -81,7 +95,7 @@ Should you encounter a bug or have any questions or suggestions, please [open an When publishing results generated using MINI-AC, please cite: -Manosalva Pérez, Nicolás, Camilla Ferrari, Julia Engelhorn, Thomas Depuydt, Hilde Nelissen, Thomas Hartwig, and Klaas Vandepoele. “MINI-AC: Inference of Plant Gene Regulatory Networks Using Bulk or Single-Cell Accessible Chromatin Profiles.” The Plant Journal. https://doi.org/10.1111/tpj.16483. +Nicolás Manosalva Pérez, Camilla Ferrari, Julia Engelhorn, Thomas Depuydt, Hilde Nelissen, Thomas Hartwig, and Klaas Vandepoele. “MINI-AC: Inference of Plant Gene Regulatory Networks Using Bulk or Single-Cell Accessible Chromatin Profiles.” The Plant Journal 117, no. 1 (2024): 280–301. https://doi.org/10.1111/tpj.16483. ## Contact diff --git a/bin/geneList2iCREs.py b/bin/geneList2iCREs.py new file mode 100644 index 0000000..8ae8e56 --- /dev/null +++ b/bin/geneList2iCREs.py @@ -0,0 +1,51 @@ +# %% +import argparse + +def parseArgs(): + + parser = argparse.ArgumentParser(prog = 'Script to get a BED file with iCREs ' + \ + 'coordinates given a list of genes', + conflict_handler='resolve') + + parser.add_argument('annotated_icres', type = str, + help = '', + metavar = 'BED file with 4th column being ' +\ + 'an annotated gene ID') + + parser.add_argument('gene_list', type = str, + help = '', + metavar = 'One column file containing gene IDs '+ \ + 'of interest') + + parser.add_argument('bed_of_genes_icres', type = str, + help = '', + metavar = 'Output BED file with coordinates '+\ + 'of iCREs associated with genes of interest') + + args = parser.parse_args() + + return args + +args = parseArgs() + +annot_icres = args.annotated_icres +genes_oi_file = args.gene_list +output_file = args.bed_of_genes_icres + +# %% +genes_oi = set() + +with open(genes_oi_file, "r") as fin: + for line in fin: + rec = line.strip().split("\t") + gene_id = rec[0] + genes_oi.add(gene_id) + +with open(output_file, "w") as fout: + with open(annot_icres, "r") as fin: + for line in fin: + rec = line.strip().split("\t") + gene_id = rec[3] + if gene_id in genes_oi: + fout.write("\t".join(rec[0:3])) + fout.write("\n") \ No newline at end of file diff --git a/data/icres/.gitkeep b/data/icres/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/FAQ.md b/docs/FAQ.md index d271012..23300af 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -2,7 +2,7 @@ ## Q: MINI-AC failed, how can I fix it? A: -* Check the [config file](/docs/configuration_pipeline.md): +* Check the [config file](/docs/pipeline_configuration.md): * Did you specify the correct [executor](https://www.nextflow.io/docs/latest/executor.html) (e.g. SGE, SLURM, ...)? Cluster-related options (i.e., all the lines starting with `clusterOptions`) should also be adapted to match the options of the selected executor. * Did you [specify to Singularity the path to the temporary directory](https://docs.sylabs.io/guides/3.5/user-guide/bind_paths_and_mounts.html)? It can be done by adjusting the parameter ```runOptions``` of singularity in Nextflow to ```--bind /absolute/path/to/tmp/folder```. To know the absolute path to the tmp folder in linux execute in the command line ```echo $TMPDIR``` diff --git a/docs/configuration_pipeline.md b/docs/pipeline_configuration.md similarity index 70% rename from docs/configuration_pipeline.md rename to docs/pipeline_configuration.md index 2422d88..12a42dd 100644 --- a/docs/configuration_pipeline.md +++ b/docs/pipeline_configuration.md @@ -6,10 +6,10 @@ This document contains an overview of the pipeline parameters. They should be de MINI-AC has 4 main inputs that need to be given as paths or folder names, with two them being optional: -* **ACR files**: Path of the folder with the BED files containing genomic coordinates corresponding to accessible chromatin regions (minimal format of 3 columns: chromosome, start, stop). This path should be given to the parameter ```ACR_dir```. -* **Output folder**: Path where the results will be stored. This path should be given to the parameter ```OutDir```. -* **(Optional) DEGs file**: Path of folder with tab-separated txt files with differential expression data associated with the input ACRs. First column must be gene ID. It can be one DEGs file per input ACR file, or paired DEGs files-ACR files. For more details see [inputs format example](../example/). This path should be given to the parameter ```DE_genes_dir```. -* **(Optional) Expressed genes file**: Path of folder with one-column txt files with gene IDs for genes expressed in the biological context of the input ACRs, to filter the inferred GRNs. It can be one Expression file per input ACR file, or paired Expressed genes files-ACR files. For more details see [inputs format example](../example/). This path should be given to the parameter ```Set_genes_dir```. +* **ACR files**: Path of the folder with the BED files containing genomic coordinates corresponding to accessible chromatin regions (minimal format of 3 columns: chromosome, start, stop). This path should be given to the parameter `ACR_dir`. +* **Output folder**: Path where the results will be stored. This path should be given to the parameter `OutDir`. +* **(Optional) DEGs file**: Path of folder with tab-separated txt files with differential expression data associated with the input ACRs. First column must be gene ID. It can be one DEGs file per input ACR file, or paired DEGs files-ACR files. For more details see [inputs format example](../example/). This path should be given to the parameter `DE_genes_dir`. +* **(Optional) Expressed genes file**: Path of folder with one-column txt files with gene IDs for genes expressed in the biological context of the input ACRs, to filter the inferred GRNs. It can be one Expression file per input ACR file, or paired Expressed genes files-ACR files. For more details see [inputs format example](../example/). This path should be given to the parameter `Set_genes_dir`. ## Input parameters @@ -17,22 +17,22 @@ MINI-AC has several optional parameters that affect the output and some aspects ### DEGs and Expressed genes files parameters -* **Species**: ```--species arabidopsis``` (command line) or ```species = "arabidopsis"``` (configuration file) for Arabidopsis, ```--species maize_v4``` (command line) or ```species = "maize_v4"``` (configuration file) for maize genome version 4, and ```--species maize_v5``` (command line) or ```species = "maize_v5"``` (configuration file) for maize genome version 5. +* **Species**: `--species arabidopsis` (command line) or `species = "arabidopsis"` (configuration file) for Arabidopsis, `--species maize_v4` (command line) or `species = "maize_v4"` (configuration file) for maize genome version 4, and `--species maize_v5` (command line) or `species = "maize_v5"` (configuration file) for maize genome version 5. -* **MINI-AC mode**: ```--mode genome_wide``` (command line) or ```mode = "genome_wide"``` (configuration file) for the genome-wide mode, and ```--mode locus_based``` (command line) or ```mode = "locus_based"``` (configuration file) for the locus-based mode. +* **MINI-AC mode**: `--mode genome_wide` (command line) or `mode = "genome_wide"` (configuration file) for the genome-wide mode, and `--mode locus_based` (command line) or `mode = "locus_based"` (configuration file) for the locus-based mode. -* **DEGs parameters**: Since providing DEGs files is optional, it needs to be specified if the path with the DEGs files is available with the parameter ```DE_genes``` set to ```DE_genes = true``` or ```DE_genes = false```. Additionally, if there is only one DEG file for all the input ACRs, you need set the parameter ```One_DE_set``` to ```One_DE_set = true```, and to ```One_DE_set = false``` if otherwise. +* **DEGs parameters**: Since providing DEGs files is optional, it needs to be specified if the path with the DEGs files is available with the parameter `DE_genes` set to `DE_genes = true` or `DE_genes = false`. Additionally, if there is only one DEG file for all the input ACRs, you need set the parameter `One_DE_set` to `One_DE_set = true`, and to `One_DE_set = false` if otherwise. -* **Expressed genes files parameters**: Since providing Expressed genes files is optional, it needs to be specified if the path with the Expressed genes files is available with the parameter ```Filter_set_genes``` set to ```Filter_set_genes = true``` or ```Filter_set_genes = false```. Additionally, if there is only one Expression file for all the input ACRs, you need set the parameter ```Filter_set_genes``` to ```Filter_set_genes = true```, and to ```Filter_set_genes = false``` if otherwise. +* **Expressed genes files parameters**: Since providing Expressed genes files is optional, it needs to be specified if the path with the Expressed genes files is available with the parameter `Filter_set_genes` set to `Filter_set_genes = true` or `Filter_set_genes = false`. Additionally, if there is only one Expression file for all the input ACRs, you need set the parameter `Filter_set_genes` to `Filter_set_genes = true`, and to `Filter_set_genes = false` if otherwise. ### GRN inference parameters -* **Motif enrichment p-value cut-off**: This is the p-value cut-off that determines which motifs are enriched and used for GRN building. We do not recommend changing this parameter. It has been internally pre-defined for each MINI-AC mode based on the p-value cut-offs with a false discovery rate of 0 (see [publication](https://doi.org/10.1111/tpj.16483)). If wished, however, this p-value can be overwritten in the configuration file by setting the parameter ```P_val``` to whatever value (see below) or in the command line options. For example: ```nextflow -C mini_ac.config run mini_ac.nf --mode genome_wide --species maize_v4 --P_val 0.05``` +* **Motif enrichment p-value cut-off**: This is the p-value cut-off that determines which motifs are enriched and used for GRN building. We do not recommend changing this parameter. It has been internally pre-defined for each MINI-AC mode based on the p-value cut-offs with a false discovery rate of 0 (see [publication](https://doi.org/10.1111/tpj.16483)). If wished, however, this p-value can be overwritten in the configuration file by setting the parameter `P_val` to whatever value (see below) or in the command line options. For example: `nextflow -C mini_ac.config run mini_ac.nf --mode genome_wide --species maize_v4 --P_val 0.05` -* **Overlap criteria parameter**: By default, MINI-AC computes motif enrichment counting the motif matches within ACRs. This, however, is difficult if the ACRs are shorter than or of similar size to the motifs, which is the case of footprints. In this case, we observed that counting the absolute base-pair overlap is useful. Therefore, in case of using footprints or short ACRs (high resolution), we recommend setting the parameter ```Bps_intersect = true```. Otherwise it should be kept ```Bps_intersect = false```. +* **Overlap criteria parameter**: By default, MINI-AC computes motif enrichment counting the motif matches within ACRs. This, however, is difficult if the ACRs are shorter than or of similar size to the motifs, which is the case of footprints. In this case, we observed that counting the absolute base-pair overlap is useful. Therefore, in case of using footprints or short ACRs (high resolution), we recommend setting the parameter `Bps_intersect = true`. Otherwise it should be kept `Bps_intersect = false`. -* **Annotation of second closest gene in genome-wide mode**: The parameters ```Second_gene_annot``` and ```Second_gene_dist``` are only taken into account by the genome-wide mode. In the genome-wide mode the motif matches are annotated to the closest gene, but in genomes like maize, there are very distal regulatory elements that regulate non-neighboring genes. Although we showed in the original publication that this does not improve results, we give the possibility of annotating the second closest genes that are within a certain distance from the motif match. To activate this option the parameter ```Second_gene_annot``` should be set to ```Second_gene_annot = true```. If so, the parameter ```Second_gene_dist``` should be used to set the specific distance cut-off (in absolute base-pairs) at which the second-closest gene has to be from the motif match in order to be assigned as target gene. +* **Annotation of second closest gene in genome-wide mode**: The parameters `Second_gene_annot` and `Second_gene_dist` are only taken into account by the genome-wide mode. In the genome-wide mode the motif matches are annotated to the closest gene, but in genomes like maize, there are very distal regulatory elements that regulate non-neighboring genes. Although we showed in the original publication that this does not improve results, we give the possibility of annotating the second closest genes that are within a certain distance from the motif match. To activate this option the parameter `Second_gene_annot` should be set to `Second_gene_annot = true`. If so, the parameter `Second_gene_dist` should be used to set the specific distance cut-off (in absolute base-pairs) at which the second-closest gene has to be from the motif match in order to be assigned as target gene. ## **Nextflow configuration file** The configuration or "config" file, is a file that Nextflow uses to manage and specify the inputs and parameters settings of a pipeline. For more details read [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html). Here we review the main aspects of the configuration file when running MINI-AC. @@ -89,7 +89,7 @@ singularity { autoMounts = true } ``` -Sometimes the temporary directory used by Singularity is not in the same root path as in the pipeline, which can cause Singularity to struggle to find it. In this case, add the ```runOptions``` line below with the absolute path to the tmp folder. To know the absolute path to the tmp folder in linux execute in the command line ```echo $TMPDIR```. Then add it as shown below. +Sometimes the temporary directory used by Singularity is not in the same root path as in the pipeline, which can cause Singularity to struggle to find it. In this case, add the `runOptions` line below with the absolute path to the tmp folder. To know the absolute path to the tmp folder in linux execute in the command line `echo $TMPDIR`. Then add it as shown below. ```nextflow process.container = "vibpsb/mini-ac:latest" @@ -110,7 +110,7 @@ executor { } ``` -MINI-AC was developed in an SGE computer cluster, for which we used the configuration below. This was used to run the genome-wide mode on maize using an input dataset of ~600,000 MOA-seq peaks. For smaller datasets, the memory values can be further reduced. Addionally, for Arabidopsis, a species with a smaller genome, less memory can also be used. +MINI-AC was developed in an SGE computer cluster, for which we used the configuration below. This was used to run the genome-wide mode on maize using an input dataset of ~600,000 MOA-seq peaks. For smaller datasets, the memory values can be further reduced. Additionally, for Arabidopsis, a species with a smaller genome, less memory can also be used. ```nextflow executor { @@ -162,7 +162,7 @@ There are mainly two cases in which the user might want to alter the internal MI ### Modification of the motif mapping file for the locus-based mode of maize -By default, the maize MINI-AC locus-based mode (for both genome versions) runs on the "medium" non-coding genomic space, which corresponds, for each locus in the genome, to the 5kb upstream of the translation start site, the 1kb downstream of the translation end site, and the introns. However, we generated two additional motif mapping files for the locus-based mode of maize, that cover "large" (15kb upstream of the translation start site, the 2.5kb downstream of the translation end site, and the introns), and "small" (1kb upstream of the translation start site, the 1kb downstream of the translation end site, and the introns) non-coding genomic spaces. For Arabidopsis only the "medium" non-coding genomic space motif mapping file was generated because it already covers 73.5% of the whole non-coding genomic psace (see publication). To use these files, first they need to be downloaded, and then, the corresponding parameters of the motif mapping file (```MotMapsFile```) and the non-coding genomic space coordinates file (```Promoter_file```) should be modified either on the command line or in the configuration file. +By default, the maize MINI-AC locus-based mode (for both genome versions) runs on the "medium" non-coding genomic space, which corresponds, for each locus in the genome, to the 5kb upstream of the translation start site, the 1kb downstream of the translation end site, and the introns. However, we generated two additional motif mapping files for the locus-based mode of maize, that cover "large" (15kb upstream of the translation start site, the 2.5kb downstream of the translation end site, and the introns), and "small" (1kb upstream of the translation start site, the 1kb downstream of the translation end site, and the introns) non-coding genomic spaces. For Arabidopsis only the "medium" non-coding genomic space motif mapping file was generated because it already covers 73.5% of the whole non-coding genomic psace (see publication). To use these files, first they need to be downloaded, and then, the corresponding parameters of the motif mapping file (`MotMapsFile`) and the non-coding genomic space coordinates file (`Promoter_file`) should be modified either on the command line or in the configuration file. To download the maize "large" motif mapping file and coordinates of the "large" non-coding genomic space: @@ -223,3 +223,42 @@ params { It is important, however, to make sure that the format is correct. The GO terms should be extended for parental terms, and this file should contain two tab-separated columns (no header), where the first column is the GO ID, and the second column is the gene ID, as shown [here](../data/zma_v4/zma_v4_go_gene_file.txt). It is vital that the gene IDs are either on Araport11 or AGPv4/NAM5.0. This same principle can also be applied to other parameters that the user wants to change. + +## iCREs-based MINI-AC configuration file + +The configuration file of iCREs-based MINI-AC has a similar structure and input parameters as regular MINI-AC (given that it runs genome-wide MINI-AC "under the hood"). The parameter `ACR_dir` should be replaced by `Gene_list_dir`. This parameter should be the path to a directory containing files in a ".txt" format, with each line containing a maize gene ID from the V4 or V5 genome version. One example can be found [here](../example/inputs/gene_set_files/UP_gene_set.txt). One GRN will be predicted for each input file. + +There is an additional input parameter named `--icres_set`, that can either be `all` or `maxf1`. The parameter `all` uses a more comprehensive and complete collection of maize putative CREs, while `maxf1` uses a set of putative CREs that is smaller but more precise (less false positives). + +One example of the parameters configuration from the file [mini_ac_icres.config](../mini_ac_icres.config) can be found below: + +```nextflow +params { + + //// Output folder + OutDir = "$projectDir/example/outputs_icres" + + //// Required input + Gene_list_dir = "$projectDir/example/inputs/gene_set_files" + + //// Optional input + // Differential expression data + DE_genes = false + DE_genes_dir = "$projectDir/example/inputs/de_files" + One_DE_set = true + // Expression data + Filter_set_genes = false + Set_genes_dir = "$projectDir/example/inputs/exp_genes_files" + One_filtering_set = true + + //// Prediction parameters + Bps_intersect = false + + + //// Prediction parameters only genome-wide + Second_gene_annot = false + Second_gene_dist = 500 +} +``` + +This version of MINI-AC can also be run with `DE_genes = true` and `Filter_set_genes = true`. However, the input files should be named accordingly, with the same name as the input file, followed by `_icres_` and `_degs_table.txt` and/or `_expressed_genes.txt`. For example, in the case of the input file [UP_gene_set.txt](../example/inputs/gene_set_files/UP_gene_set.txt), the corresponding DEGs and expressed genes files should be named `UP_gene_set_icres_degs_table.txt` and `UP_gene_set_icres_expressed_genes.txt`, respectively. \ No newline at end of file diff --git a/example/README.md b/example/README.md index b2ead4f..6d93681 100644 --- a/example/README.md +++ b/example/README.md @@ -165,3 +165,15 @@ The [OUTPUTS folder](outputs/) contains four sub-folders: - Maize gene name and Arabidopsis ortholog gene name combined. - (Optional; if expressed genes provided) True if the TF is present in the user-provided list of expressed genes, False otherwise. - (Optional; if DE table provided) Differential expression information. The first column is the gene ID, and the rest of columns depend on the content of the user-provided table in input folder "de_files". + +## iCREs-based MINI-AC + +The outputs of the iCREs-based MINI-AC runs are identical to the default MINI-AC, as it can be seen in the folder [outputs_icres](outputs_icres) (not available until publication). However, two input parameters change: + +* Instead of providing an input BED file with genomic coordinates, the input should be a list of gene IDs from the version V4 or V5 of the maize genome, as in this [example](./inputs/gene_set_files/UP_gene_set.txt). + +* There is an additional input parameter named `--icres_set` that can either be `all` or `maxf1`. The parameter `all` uses a more comprehensive and complete collection of maize putative CREs, while `maxf1` uses a set of putative CREs that is smaller but more precise (less false positives). To download the files with the genomic coordinates of these two iCREs sets, the following commands should be executed on the **top-level directory of the repository**: + +```shell +NOT AVAILABLE UNTIL PUBLICATION +``` diff --git a/example/inputs/gene_set_files/DOWN_gene_set.txt b/example/inputs/gene_set_files/DOWN_gene_set.txt new file mode 100644 index 0000000..6b80a7c --- /dev/null +++ b/example/inputs/gene_set_files/DOWN_gene_set.txt @@ -0,0 +1,1702 @@ +Zm00001eb048680 +Zm00001eb252000 +Zm00001eb042740 +Zm00001eb113190 +Zm00001eb313500 +Zm00001eb372980 +Zm00001eb058360 +Zm00001eb104300 +Zm00001eb428680 +Zm00001eb088850 +Zm00001eb256330 +Zm00001eb260410 +Zm00001eb320890 +Zm00001eb104820 +Zm00001eb367770 +Zm00001eb367240 +Zm00001eb308430 +Zm00001eb348520 +Zm00001eb427620 +Zm00001eb433660 +Zm00001eb060890 +Zm00001eb280560 +Zm00001eb342970 +Zm00001eb025990 +Zm00001eb019420 +Zm00001eb154550 +Zm00001eb324090 +Zm00001eb417760 +Zm00001eb093090 +Zm00001eb406420 +Zm00001eb383830 +Zm00001eb161460 +Zm00001eb191220 +Zm00001eb306070 +Zm00001eb288290 +Zm00001eb270400 +Zm00001eb262550 +Zm00001eb216090 +Zm00001eb264250 +Zm00001eb000840 +Zm00001eb426520 +Zm00001eb325300 +Zm00001eb136280 +Zm00001eb136200 +Zm00001eb093670 +Zm00001eb362510 +Zm00001eb302350 +Zm00001eb413300 +Zm00001eb350000 +Zm00001eb208350 +Zm00001eb392870 +Zm00001eb124210 +Zm00001eb056950 +Zm00001eb013360 +Zm00001eb212520 +Zm00001eb040140 +Zm00001eb385840 +Zm00001eb058790 +Zm00001eb141540 +Zm00001eb010110 +Zm00001eb053420 +Zm00001eb054050 +Zm00001eb386530 +Zm00001eb211600 +Zm00001eb292260 +Zm00001eb305600 +Zm00001eb403530 +Zm00001eb048020 +Zm00001eb388510 +Zm00001eb189890 +Zm00001eb379830 +Zm00001eb316320 +Zm00001eb120460 +Zm00001eb201350 +Zm00001eb407780 +Zm00001eb136840 +Zm00001eb313250 +Zm00001eb234470 +Zm00001eb125980 +Zm00001eb319950 +Zm00001eb423310 +Zm00001eb139850 +Zm00001eb431800 +Zm00001eb105800 +Zm00001eb010890 +Zm00001eb164960 +Zm00001eb393290 +Zm00001eb341960 +Zm00001eb106410 +Zm00001eb002190 +Zm00001eb338570 +Zm00001eb170440 +Zm00001eb420160 +Zm00001eb327190 +Zm00001eb412150 +Zm00001eb240590 +Zm00001eb135940 +Zm00001eb068470 +Zm00001eb298310 +Zm00001eb215680 +Zm00001eb200790 +Zm00001eb153630 +Zm00001eb036550 +Zm00001eb168840 +Zm00001eb321900 +Zm00001eb101450 +Zm00001eb227200 +Zm00001eb218170 +Zm00001eb125120 +Zm00001eb080890 +Zm00001eb267550 +Zm00001eb353270 +Zm00001eb190110 +Zm00001eb010660 +Zm00001eb188380 +Zm00001eb317430 +Zm00001eb229240 +Zm00001eb161390 +Zm00001eb433040 +Zm00001eb045750 +Zm00001eb425320 +Zm00001eb220940 +Zm00001eb386550 +Zm00001eb218080 +Zm00001eb234450 +Zm00001eb238170 +Zm00001eb003960 +Zm00001eb041760 +Zm00001eb204270 +Zm00001eb147030 +Zm00001eb352550 +Zm00001eb094890 +Zm00001eb303350 +Zm00001eb183220 +Zm00001eb320670 +Zm00001eb268650 +Zm00001eb235380 +Zm00001eb122620 +Zm00001eb007740 +Zm00001eb286540 +Zm00001eb251990 +Zm00001eb154600 +Zm00001eb091400 +Zm00001eb173840 +Zm00001eb187910 +Zm00001eb142230 +Zm00001eb098650 +Zm00001eb274840 +Zm00001eb040070 +Zm00001eb231510 +Zm00001eb077130 +Zm00001eb337110 +Zm00001eb332650 +Zm00001eb230270 +Zm00001eb330550 +Zm00001eb200270 +Zm00001eb413170 +Zm00001eb117130 +Zm00001eb040790 +Zm00001eb003450 +Zm00001eb086180 +Zm00001eb261760 +Zm00001eb172170 +Zm00001eb328600 +Zm00001eb300630 +Zm00001eb237440 +Zm00001eb080320 +Zm00001eb062900 +Zm00001eb042220 +Zm00001eb381240 +Zm00001eb035580 +Zm00001eb053360 +Zm00001eb370610 +Zm00001eb299370 +Zm00001eb187050 +Zm00001eb284380 +Zm00001eb233760 +Zm00001eb127280 +Zm00001eb091250 +Zm00001eb113070 +Zm00001eb396390 +Zm00001eb180060 +Zm00001eb253280 +Zm00001eb008170 +Zm00001eb349760 +Zm00001eb054000 +Zm00001eb285500 +Zm00001eb042260 +Zm00001eb442430 +Zm00001eb078870 +Zm00001eb179080 +Zm00001eb314130 +Zm00001eb195030 +Zm00001eb010130 +Zm00001eb022980 +Zm00001eb073610 +Zm00001eb041860 +Zm00001eb250030 +Zm00001eb418040 +Zm00001eb068300 +Zm00001eb200230 +Zm00001eb298290 +Zm00001eb396720 +Zm00001eb017800 +Zm00001eb323410 +Zm00001eb206400 +Zm00001eb070510 +Zm00001eb039850 +Zm00001eb143910 +Zm00001eb217220 +Zm00001eb014930 +Zm00001eb406810 +Zm00001eb429330 +Zm00001eb281720 +Zm00001eb087570 +Zm00001eb254510 +Zm00001eb371840 +Zm00001eb122890 +Zm00001eb002740 +Zm00001eb117370 +Zm00001eb137420 +Zm00001eb410900 +Zm00001eb172390 +Zm00001eb075140 +Zm00001eb140910 +Zm00001eb312210 +Zm00001eb117890 +Zm00001eb349770 +Zm00001eb121780 +Zm00001eb405730 +Zm00001eb253380 +Zm00001eb414570 +Zm00001eb138650 +Zm00001eb296680 +Zm00001eb261260 +Zm00001eb013610 +Zm00001eb284890 +Zm00001eb316940 +Zm00001eb001970 +Zm00001eb314830 +Zm00001eb021010 +Zm00001eb288730 +Zm00001eb074370 +Zm00001eb120050 +Zm00001eb119570 +Zm00001eb347720 +Zm00001eb160210 +Zm00001eb233410 +Zm00001eb157310 +Zm00001eb248870 +Zm00001eb154850 +Zm00001eb058110 +Zm00001eb421950 +Zm00001eb038530 +Zm00001eb402600 +Zm00001eb330310 +Zm00001eb356910 +Zm00001eb428020 +Zm00001eb104260 +Zm00001eb197400 +Zm00001eb229580 +Zm00001eb017630 +Zm00001eb107560 +Zm00001eb212330 +Zm00001eb266690 +Zm00001eb348840 +Zm00001eb267560 +Zm00001eb434070 +Zm00001eb340760 +Zm00001eb113090 +Zm00001eb034810 +Zm00001eb228670 +Zm00001eb059460 +Zm00001eb261560 +Zm00001eb069320 +Zm00001eb216220 +Zm00001eb001120 +Zm00001eb173390 +Zm00001eb039810 +Zm00001eb144350 +Zm00001eb239020 +Zm00001eb268500 +Zm00001eb243770 +Zm00001eb209570 +Zm00001eb406460 +Zm00001eb059970 +Zm00001eb159140 +Zm00001eb142370 +Zm00001eb143880 +Zm00001eb156820 +Zm00001eb072910 +Zm00001eb228700 +Zm00001eb057440 +Zm00001eb335560 +Zm00001eb133760 +Zm00001eb322740 +Zm00001eb229960 +Zm00001eb002950 +Zm00001eb201740 +Zm00001eb433900 +Zm00001eb158960 +Zm00001eb207870 +Zm00001eb204410 +Zm00001eb191420 +Zm00001eb249290 +Zm00001eb037930 +Zm00001eb260710 +Zm00001eb386080 +Zm00001eb263160 +Zm00001eb164750 +Zm00001eb188120 +Zm00001eb336030 +Zm00001eb072370 +Zm00001eb070520 +Zm00001eb299480 +Zm00001eb296170 +Zm00001eb146930 +Zm00001eb102290 +Zm00001eb314090 +Zm00001eb101710 +Zm00001eb234930 +Zm00001eb361620 +Zm00001eb388100 +Zm00001eb058480 +Zm00001eb328310 +Zm00001eb147290 +Zm00001eb433340 +Zm00001eb244190 +Zm00001eb431360 +Zm00001eb284690 +Zm00001eb181800 +Zm00001eb118990 +Zm00001eb338940 +Zm00001eb009250 +Zm00001eb205940 +Zm00001eb023950 +Zm00001eb377890 +Zm00001eb237580 +Zm00001eb123000 +Zm00001eb173250 +Zm00001eb286710 +Zm00001eb362760 +Zm00001eb216410 +Zm00001eb095030 +Zm00001eb093430 +Zm00001eb126660 +Zm00001eb189450 +Zm00001eb315460 +Zm00001eb313970 +Zm00001eb299010 +Zm00001eb139880 +Zm00001eb082290 +Zm00001eb267020 +Zm00001eb169330 +Zm00001eb282410 +Zm00001eb257950 +Zm00001eb277570 +Zm00001eb159910 +Zm00001eb153000 +Zm00001eb273380 +Zm00001eb332300 +Zm00001eb130790 +Zm00001eb386480 +Zm00001eb319770 +Zm00001eb245920 +Zm00001eb187260 +Zm00001eb246810 +Zm00001eb159210 +Zm00001eb272090 +Zm00001eb169560 +Zm00001eb287450 +Zm00001eb083840 +Zm00001eb034290 +Zm00001eb106840 +Zm00001eb123090 +Zm00001eb392450 +Zm00001eb426920 +Zm00001eb242690 +Zm00001eb370870 +Zm00001eb244390 +Zm00001eb280600 +Zm00001eb252800 +Zm00001eb326840 +Zm00001eb345070 +Zm00001eb157710 +Zm00001eb146390 +Zm00001eb188360 +Zm00001eb169750 +Zm00001eb207660 +Zm00001eb166630 +Zm00001eb162710 +Zm00001eb022580 +Zm00001eb357740 +Zm00001eb013370 +Zm00001eb366090 +Zm00001eb330540 +Zm00001eb234460 +Zm00001eb146910 +Zm00001eb356630 +Zm00001eb412420 +Zm00001eb397570 +Zm00001eb389560 +Zm00001eb101660 +Zm00001eb115150 +Zm00001eb293010 +Zm00001eb323460 +Zm00001eb285920 +Zm00001eb168140 +Zm00001eb076240 +Zm00001eb261830 +Zm00001eb406430 +Zm00001eb376490 +Zm00001eb298270 +Zm00001eb151330 +Zm00001eb235950 +Zm00001eb261510 +Zm00001eb163780 +Zm00001eb071970 +Zm00001eb217190 +Zm00001eb369060 +Zm00001eb018040 +Zm00001eb102880 +Zm00001eb429870 +Zm00001eb172450 +Zm00001eb406570 +Zm00001eb013780 +Zm00001eb219830 +Zm00001eb064530 +Zm00001eb265080 +Zm00001eb058590 +Zm00001eb158260 +Zm00001eb059240 +Zm00001eb282810 +Zm00001eb271860 +Zm00001eb031100 +Zm00001eb024870 +Zm00001eb213570 +Zm00001eb300670 +Zm00001eb203660 +Zm00001eb323200 +Zm00001eb034650 +Zm00001eb076070 +Zm00001eb033390 +Zm00001eb112750 +Zm00001eb251520 +Zm00001eb395660 +Zm00001eb256380 +Zm00001eb161140 +Zm00001eb196320 +Zm00001eb145680 +Zm00001eb313660 +Zm00001eb182130 +Zm00001eb019920 +Zm00001eb054810 +Zm00001eb200930 +Zm00001eb042990 +Zm00001eb011070 +Zm00001eb073590 +Zm00001eb197320 +Zm00001eb309040 +Zm00001eb118350 +Zm00001eb416900 +Zm00001eb326850 +Zm00001eb175940 +Zm00001eb220390 +Zm00001eb165000 +Zm00001eb371490 +Zm00001eb312640 +Zm00001eb227020 +Zm00001eb381290 +Zm00001eb394170 +Zm00001eb393680 +Zm00001eb325410 +Zm00001eb174860 +Zm00001eb299420 +Zm00001eb358400 +Zm00001eb143320 +Zm00001eb161790 +Zm00001eb245650 +Zm00001eb032890 +Zm00001eb040580 +Zm00001eb376530 +Zm00001eb041770 +Zm00001eb433500 +Zm00001eb016180 +Zm00001eb144960 +Zm00001eb182980 +Zm00001eb396990 +Zm00001eb323400 +Zm00001eb247430 +Zm00001eb214080 +Zm00001eb371980 +Zm00001eb080840 +Zm00001eb254150 +Zm00001eb370480 +Zm00001eb163190 +Zm00001eb127030 +Zm00001eb106430 +Zm00001eb203630 +Zm00001eb081810 +Zm00001eb344100 +Zm00001eb126330 +Zm00001eb005330 +Zm00001eb242120 +Zm00001eb009510 +Zm00001eb379570 +Zm00001eb298320 +Zm00001eb183690 +Zm00001eb278470 +Zm00001eb017450 +Zm00001eb296280 +Zm00001eb261620 +Zm00001eb254990 +Zm00001eb076470 +Zm00001eb310370 +Zm00001eb372600 +Zm00001eb042720 +Zm00001eb432100 +Zm00001eb305300 +Zm00001eb046560 +Zm00001eb232250 +Zm00001eb150790 +Zm00001eb045490 +Zm00001eb240100 +Zm00001eb431560 +Zm00001eb047650 +Zm00001eb347850 +Zm00001eb322950 +Zm00001eb043120 +Zm00001eb124900 +Zm00001eb044420 +Zm00001eb063890 +Zm00001eb223960 +Zm00001eb151160 +Zm00001eb068280 +Zm00001eb402240 +Zm00001eb008810 +Zm00001eb035010 +Zm00001eb239700 +Zm00001eb430720 +Zm00001eb332100 +Zm00001eb138250 +Zm00001eb417940 +Zm00001eb075420 +Zm00001eb409190 +Zm00001eb159020 +Zm00001eb168550 +Zm00001eb118730 +Zm00001eb151510 +Zm00001eb359280 +Zm00001eb141060 +Zm00001eb170510 +Zm00001eb279820 +Zm00001eb179680 +Zm00001eb415250 +Zm00001eb303060 +Zm00001eb381250 +Zm00001eb144530 +Zm00001eb284070 +Zm00001eb146920 +Zm00001eb074400 +Zm00001eb166740 +Zm00001eb231110 +Zm00001eb146750 +Zm00001eb001950 +Zm00001eb413540 +Zm00001eb207670 +Zm00001eb033650 +Zm00001eb046960 +Zm00001eb089390 +Zm00001eb373490 +Zm00001eb332560 +Zm00001eb145890 +Zm00001eb320280 +Zm00001eb340750 +Zm00001eb408010 +Zm00001eb246190 +Zm00001eb379170 +Zm00001eb389670 +Zm00001eb289000 +Zm00001eb376570 +Zm00001eb155060 +Zm00001eb405190 +Zm00001eb069310 +Zm00001eb254390 +Zm00001eb282840 +Zm00001eb317510 +Zm00001eb013880 +Zm00001eb008940 +Zm00001eb294720 +Zm00001eb421850 +Zm00001eb106380 +Zm00001eb042970 +Zm00001eb390480 +Zm00001eb216070 +Zm00001eb069410 +Zm00001eb013340 +Zm00001eb275520 +Zm00001eb291670 +Zm00001eb211860 +Zm00001eb379090 +Zm00001eb107080 +Zm00001eb332630 +Zm00001eb254540 +Zm00001eb102540 +Zm00001eb270150 +Zm00001eb066060 +Zm00001eb056870 +Zm00001eb106420 +Zm00001eb419500 +Zm00001eb297620 +Zm00001eb260140 +Zm00001eb139590 +Zm00001eb080160 +Zm00001eb359070 +Zm00001eb298350 +Zm00001eb432920 +Zm00001eb283450 +Zm00001eb421270 +Zm00001eb306140 +Zm00001eb209070 +Zm00001eb332400 +Zm00001eb153360 +Zm00001eb105580 +Zm00001eb175860 +Zm00001eb122820 +Zm00001eb128920 +Zm00001eb329580 +Zm00001eb054430 +Zm00001eb056260 +Zm00001eb107520 +Zm00001eb187380 +Zm00001eb406080 +Zm00001eb147530 +Zm00001eb172290 +Zm00001eb188240 +Zm00001eb021970 +Zm00001eb036910 +Zm00001eb295610 +Zm00001eb098970 +Zm00001eb339420 +Zm00001eb261910 +Zm00001eb016200 +Zm00001eb371930 +Zm00001eb157410 +Zm00001eb140320 +Zm00001eb118360 +Zm00001eb426180 +Zm00001eb002620 +Zm00001eb058050 +Zm00001eb364620 +Zm00001eb066100 +Zm00001eb070910 +Zm00001eb211080 +Zm00001eb428630 +Zm00001eb316290 +Zm00001eb394280 +Zm00001eb034370 +Zm00001eb078010 +Zm00001eb033870 +Zm00001eb027860 +Zm00001eb122190 +Zm00001eb130400 +Zm00001eb059550 +Zm00001eb080680 +Zm00001eb405720 +Zm00001eb122240 +Zm00001eb275710 +Zm00001eb016470 +Zm00001eb100960 +Zm00001eb367300 +Zm00001eb394290 +Zm00001eb275420 +Zm00001eb379340 +Zm00001eb073340 +Zm00001eb019640 +Zm00001eb323340 +Zm00001eb386410 +Zm00001eb015950 +Zm00001eb188280 +Zm00001eb037440 +Zm00001eb320680 +Zm00001eb284390 +Zm00001eb129000 +Zm00001eb102260 +Zm00001eb227320 +Zm00001eb099810 +Zm00001eb025910 +Zm00001eb343850 +Zm00001eb311230 +Zm00001eb114700 +Zm00001eb283560 +Zm00001eb239670 +Zm00001eb041700 +Zm00001eb352210 +Zm00001eb045010 +Zm00001eb268320 +Zm00001eb383280 +Zm00001eb363330 +Zm00001eb325650 +Zm00001eb248960 +Zm00001eb038890 +Zm00001eb166750 +Zm00001eb185470 +Zm00001eb163720 +Zm00001eb421790 +Zm00001eb427460 +Zm00001eb335390 +Zm00001eb113200 +Zm00001eb226760 +Zm00001eb345910 +Zm00001eb260160 +Zm00001eb202020 +Zm00001eb008070 +Zm00001eb022810 +Zm00001eb433610 +Zm00001eb152390 +Zm00001eb056210 +Zm00001eb111670 +Zm00001eb260150 +Zm00001eb311920 +Zm00001eb220820 +Zm00001eb052520 +Zm00001eb127880 +Zm00001eb340550 +Zm00001eb157880 +Zm00001eb077580 +Zm00001eb158810 +Zm00001eb111130 +Zm00001eb324180 +Zm00001eb425800 +Zm00001eb154790 +Zm00001eb009460 +Zm00001eb165410 +Zm00001eb018250 +Zm00001eb365590 +Zm00001eb418750 +Zm00001eb336810 +Zm00001eb012470 +Zm00001eb313320 +Zm00001eb256950 +Zm00001eb077220 +Zm00001eb284520 +Zm00001eb300960 +Zm00001eb173500 +Zm00001eb275080 +Zm00001eb047940 +Zm00001eb189470 +Zm00001eb367150 +Zm00001eb082890 +Zm00001eb347320 +Zm00001eb196500 +Zm00001eb395970 +Zm00001eb230250 +Zm00001eb144580 +Zm00001eb109290 +Zm00001eb159610 +Zm00001eb244570 +Zm00001eb138100 +Zm00001eb093890 +Zm00001eb251040 +Zm00001eb277780 +Zm00001eb185260 +Zm00001eb016920 +Zm00001eb147750 +Zm00001eb006410 +Zm00001eb083520 +Zm00001eb252160 +Zm00001eb359900 +Zm00001eb315140 +Zm00001eb364690 +Zm00001eb083690 +Zm00001eb266570 +Zm00001eb034610 +Zm00001eb281090 +Zm00001eb226020 +Zm00001eb390820 +Zm00001eb103310 +Zm00001eb377870 +Zm00001eb163860 +Zm00001eb052650 +Zm00001eb259370 +Zm00001eb169520 +Zm00001eb032850 +Zm00001eb039500 +Zm00001eb410930 +Zm00001eb087720 +Zm00001eb072900 +Zm00001eb228100 +Zm00001eb020600 +Zm00001eb031210 +Zm00001eb212360 +Zm00001eb285960 +Zm00001eb288550 +Zm00001eb165560 +Zm00001eb044020 +Zm00001eb077490 +Zm00001eb072720 +Zm00001eb354680 +Zm00001eb388220 +Zm00001eb190090 +Zm00001eb291630 +Zm00001eb324740 +Zm00001eb273420 +Zm00001eb146780 +Zm00001eb339490 +Zm00001eb315060 +Zm00001eb384280 +Zm00001eb043810 +Zm00001eb006180 +Zm00001eb330460 +Zm00001eb301100 +Zm00001eb309340 +Zm00001eb106810 +Zm00001eb149170 +Zm00001eb105190 +Zm00001eb029470 +Zm00001eb282040 +Zm00001eb043610 +Zm00001eb010440 +Zm00001eb236640 +Zm00001eb032600 +Zm00001eb203860 +Zm00001eb260610 +Zm00001eb318610 +Zm00001eb311150 +Zm00001eb121020 +Zm00001eb405820 +Zm00001eb245960 +Zm00001eb057750 +Zm00001eb399120 +Zm00001eb169010 +Zm00001eb283530 +Zm00001eb246520 +Zm00001eb335670 +Zm00001eb103020 +Zm00001eb332210 +Zm00001eb072600 +Zm00001eb138680 +Zm00001eb358880 +Zm00001eb078490 +Zm00001eb318080 +Zm00001eb080540 +Zm00001eb067840 +Zm00001eb286440 +Zm00001eb170300 +Zm00001eb260220 +Zm00001eb112260 +Zm00001eb196880 +Zm00001eb268760 +Zm00001eb108910 +Zm00001eb134240 +Zm00001eb426240 +Zm00001eb225670 +Zm00001eb278310 +Zm00001eb249100 +Zm00001eb042780 +Zm00001eb131150 +Zm00001eb357910 +Zm00001eb398500 +Zm00001eb170920 +Zm00001eb204960 +Zm00001eb234390 +Zm00001eb170060 +Zm00001eb359110 +Zm00001eb240490 +Zm00001eb033760 +Zm00001eb396520 +Zm00001eb364830 +Zm00001eb354260 +Zm00001eb142810 +Zm00001eb199200 +Zm00001eb395510 +Zm00001eb289770 +Zm00001eb153340 +Zm00001eb401330 +Zm00001eb067250 +Zm00001eb174880 +Zm00001eb336850 +Zm00001eb005760 +Zm00001eb286830 +Zm00001eb346420 +Zm00001eb015820 +Zm00001eb316520 +Zm00001eb016390 +Zm00001eb425050 +Zm00001eb170180 +Zm00001eb158000 +Zm00001eb398840 +Zm00001eb321140 +Zm00001eb049040 +Zm00001eb363310 +Zm00001eb291380 +Zm00001eb418860 +Zm00001eb032280 +Zm00001eb173100 +Zm00001eb312450 +Zm00001eb077240 +Zm00001eb322810 +Zm00001eb159850 +Zm00001eb336410 +Zm00001eb229250 +Zm00001eb053090 +Zm00001eb271820 +Zm00001eb226280 +Zm00001eb298040 +Zm00001eb347530 +Zm00001eb198660 +Zm00001eb081060 +Zm00001eb211260 +Zm00001eb042800 +Zm00001eb243730 +Zm00001eb154990 +Zm00001eb234530 +Zm00001eb048930 +Zm00001eb127870 +Zm00001eb368990 +Zm00001eb006470 +Zm00001eb287940 +Zm00001eb334540 +Zm00001eb344000 +Zm00001eb161410 +Zm00001eb313760 +Zm00001eb009660 +Zm00001eb101770 +Zm00001eb383680 +Zm00001eb013990 +Zm00001eb200260 +Zm00001eb426190 +Zm00001eb173510 +Zm00001eb143310 +Zm00001eb107870 +Zm00001eb175600 +Zm00001eb061670 +Zm00001eb034520 +Zm00001eb389210 +Zm00001eb284350 +Zm00001eb100360 +Zm00001eb327810 +Zm00001eb091360 +Zm00001eb275090 +Zm00001eb193970 +Zm00001eb041050 +Zm00001eb186570 +Zm00001eb330300 +Zm00001eb350800 +Zm00001eb203770 +Zm00001eb321030 +Zm00001eb214960 +Zm00001eb016080 +Zm00001eb041460 +Zm00001eb048510 +Zm00001eb050970 +Zm00001eb023280 +Zm00001eb417150 +Zm00001eb280970 +Zm00001eb280930 +Zm00001eb211050 +Zm00001eb025140 +Zm00001eb205110 +Zm00001eb196240 +Zm00001eb244560 +Zm00001eb339340 +Zm00001eb078770 +Zm00001eb131870 +Zm00001eb061410 +Zm00001eb295060 +Zm00001eb302400 +Zm00001eb113350 +Zm00001eb020060 +Zm00001eb402430 +Zm00001eb334110 +Zm00001eb072580 +Zm00001eb288190 +Zm00001eb212010 +Zm00001eb194080 +Zm00001eb370450 +Zm00001eb073250 +Zm00001eb087860 +Zm00001eb417790 +Zm00001eb306700 +Zm00001eb394260 +Zm00001eb346280 +Zm00001eb340500 +Zm00001eb323420 +Zm00001eb388240 +Zm00001eb012800 +Zm00001eb404050 +Zm00001eb250590 +Zm00001eb138340 +Zm00001eb146770 +Zm00001eb147400 +Zm00001eb002760 +Zm00001eb426320 +Zm00001eb399520 +Zm00001eb290990 +Zm00001eb256650 +Zm00001eb145860 +Zm00001eb392420 +Zm00001eb154650 +Zm00001eb189920 +Zm00001eb098640 +Zm00001eb243700 +Zm00001eb064340 +Zm00001eb232910 +Zm00001eb208810 +Zm00001eb050610 +Zm00001eb410040 +Zm00001eb377880 +Zm00001eb398880 +Zm00001eb033080 +Zm00001eb312890 +Zm00001eb324230 +Zm00001eb279810 +Zm00001eb326660 +Zm00001eb039650 +Zm00001eb385070 +Zm00001eb200910 +Zm00001eb070840 +Zm00001eb070230 +Zm00001eb292200 +Zm00001eb273000 +Zm00001eb114370 +Zm00001eb408520 +Zm00001eb119760 +Zm00001eb427310 +Zm00001eb359410 +Zm00001eb242760 +Zm00001eb369930 +Zm00001eb231210 +Zm00001eb160550 +Zm00001eb201150 +Zm00001eb084430 +Zm00001eb011560 +Zm00001eb034220 +Zm00001eb145600 +Zm00001eb306400 +Zm00001eb111420 +Zm00001eb258850 +Zm00001eb247150 +Zm00001eb233170 +Zm00001eb344040 +Zm00001eb046710 +Zm00001eb316910 +Zm00001eb051620 +Zm00001eb367000 +Zm00001eb141530 +Zm00001eb370130 +Zm00001eb162630 +Zm00001eb374630 +Zm00001eb073640 +Zm00001eb066570 +Zm00001eb103380 +Zm00001eb386750 +Zm00001eb267210 +Zm00001eb344730 +Zm00001eb125060 +Zm00001eb250830 +Zm00001eb027940 +Zm00001eb290050 +Zm00001eb000750 +Zm00001eb136720 +Zm00001eb129970 +Zm00001eb264590 +Zm00001eb209360 +Zm00001eb078340 +Zm00001eb347770 +Zm00001eb413520 +Zm00001eb310440 +Zm00001eb122660 +Zm00001eb430380 +Zm00001eb322880 +Zm00001eb320950 +Zm00001eb339000 +Zm00001eb299980 +Zm00001eb228000 +Zm00001eb012750 +Zm00001eb001650 +Zm00001eb116470 +Zm00001eb217500 +Zm00001eb013480 +Zm00001eb290480 +Zm00001eb147120 +Zm00001eb064780 +Zm00001eb377440 +Zm00001eb164730 +Zm00001eb047660 +Zm00001eb338860 +Zm00001eb352400 +Zm00001eb011380 +Zm00001eb088800 +Zm00001eb187950 +Zm00001eb103250 +Zm00001eb021710 +Zm00001eb011940 +Zm00001eb404120 +Zm00001eb429440 +Zm00001eb127490 +Zm00001eb299020 +Zm00001eb319210 +Zm00001eb213420 +Zm00001eb092540 +Zm00001eb287770 +Zm00001eb392080 +Zm00001eb375120 +Zm00001eb240650 +Zm00001eb018270 +Zm00001eb263000 +Zm00001eb290700 +Zm00001eb420190 +Zm00001eb395130 +Zm00001eb095550 +Zm00001eb017330 +Zm00001eb068160 +Zm00001eb106160 +Zm00001eb129850 +Zm00001eb223890 +Zm00001eb331630 +Zm00001eb061730 +Zm00001eb177750 +Zm00001eb137110 +Zm00001eb018750 +Zm00001eb143090 +Zm00001eb038180 +Zm00001eb207130 +Zm00001eb276540 +Zm00001eb361760 +Zm00001eb400390 +Zm00001eb108660 +Zm00001eb396100 +Zm00001eb284170 +Zm00001eb021050 +Zm00001eb134700 +Zm00001eb079430 +Zm00001eb111790 +Zm00001eb010770 +Zm00001eb249020 +Zm00001eb387350 +Zm00001eb333200 +Zm00001eb050930 +Zm00001eb088840 +Zm00001eb296690 +Zm00001eb214410 +Zm00001eb245320 +Zm00001eb195210 +Zm00001eb251170 +Zm00001eb145340 +Zm00001eb069630 +Zm00001eb299930 +Zm00001eb397810 +Zm00001eb148310 +Zm00001eb156830 +Zm00001eb002470 +Zm00001eb120310 +Zm00001eb260800 +Zm00001eb216120 +Zm00001eb202360 +Zm00001eb427190 +Zm00001eb165900 +Zm00001eb345250 +Zm00001eb400430 +Zm00001eb249150 +Zm00001eb387040 +Zm00001eb004050 +Zm00001eb355370 +Zm00001eb056030 +Zm00001eb422400 +Zm00001eb008800 +Zm00001eb127040 +Zm00001eb369970 +Zm00001eb378720 +Zm00001eb274380 +Zm00001eb004670 +Zm00001eb042410 +Zm00001eb264170 +Zm00001eb242150 +Zm00001eb106070 +Zm00001eb409580 +Zm00001eb349810 +Zm00001eb138310 +Zm00001eb182740 +Zm00001eb404690 +Zm00001eb032650 +Zm00001eb112950 +Zm00001eb249090 +Zm00001eb313530 +Zm00001eb172590 +Zm00001eb047830 +Zm00001eb204630 +Zm00001eb348300 +Zm00001eb433840 +Zm00001eb426980 +Zm00001eb092610 +Zm00001eb281630 +Zm00001eb187430 +Zm00001eb234720 +Zm00001eb405380 +Zm00001eb388200 +Zm00001eb198190 +Zm00001eb289470 +Zm00001eb081760 +Zm00001eb286760 +Zm00001eb161980 +Zm00001eb290460 +Zm00001eb010320 +Zm00001eb313510 +Zm00001eb428940 +Zm00001eb033440 +Zm00001eb316180 +Zm00001eb155550 +Zm00001eb151430 +Zm00001eb397580 +Zm00001eb310330 +Zm00001eb222030 +Zm00001eb346450 +Zm00001eb074080 +Zm00001eb241730 +Zm00001eb016410 +Zm00001eb039430 +Zm00001eb276150 +Zm00001eb008920 +Zm00001eb077890 +Zm00001eb193820 +Zm00001eb252030 +Zm00001eb145030 +Zm00001eb076900 +Zm00001eb260010 +Zm00001eb361960 +Zm00001eb380530 +Zm00001eb257940 +Zm00001eb346880 +Zm00001eb297410 +Zm00001eb338810 +Zm00001eb083950 +Zm00001eb088500 +Zm00001eb386210 +Zm00001eb356720 +Zm00001eb056340 +Zm00001eb261490 +Zm00001eb100720 +Zm00001eb384100 +Zm00001eb217170 +Zm00001eb035340 +Zm00001eb034740 +Zm00001eb002660 +Zm00001eb340160 +Zm00001eb197410 +Zm00001eb405290 +Zm00001eb011100 +Zm00001eb055330 +Zm00001eb264870 +Zm00001eb174590 +Zm00001eb366810 +Zm00001eb209990 +Zm00001eb273230 +Zm00001eb248170 +Zm00001eb123240 +Zm00001eb118250 +Zm00001eb011080 +Zm00001eb135250 +Zm00001eb272040 +Zm00001eb012570 +Zm00001eb375460 +Zm00001eb175230 +Zm00001eb389660 +Zm00001eb187850 +Zm00001eb131190 +Zm00001eb365040 +Zm00001eb018600 +Zm00001eb138290 +Zm00001eb058100 +Zm00001eb017670 +Zm00001eb347270 +Zm00001eb315150 +Zm00001eb175890 +Zm00001eb083980 +Zm00001eb293450 +Zm00001eb106090 +Zm00001eb033880 +Zm00001eb129010 +Zm00001eb231540 +Zm00001eb410260 +Zm00001eb312390 +Zm00001eb368640 +Zm00001eb247660 +Zm00001eb212930 +Zm00001eb023600 +Zm00001eb391720 +Zm00001eb124740 +Zm00001eb003200 +Zm00001eb362560 +Zm00001eb017120 +Zm00001eb386660 +Zm00001eb013890 +Zm00001eb022920 +Zm00001eb064990 +Zm00001eb081570 +Zm00001eb352450 +Zm00001eb238230 +Zm00001eb155650 +Zm00001eb353280 +Zm00001eb071710 +Zm00001eb131840 +Zm00001eb294020 +Zm00001eb321670 +Zm00001eb414810 +Zm00001eb410450 +Zm00001eb054390 +Zm00001eb141870 +Zm00001eb397060 +Zm00001eb323550 +Zm00001eb334490 +Zm00001eb346520 +Zm00001eb115060 +Zm00001eb130730 +Zm00001eb134020 +Zm00001eb176470 +Zm00001eb195720 +Zm00001eb163440 +Zm00001eb323370 +Zm00001eb262840 +Zm00001eb199410 +Zm00001eb339120 +Zm00001eb257570 +Zm00001eb070250 +Zm00001eb062040 +Zm00001eb289300 +Zm00001eb240340 +Zm00001eb189510 +Zm00001eb008930 +Zm00001eb280210 +Zm00001eb223590 +Zm00001eb396710 +Zm00001eb362640 +Zm00001eb280920 +Zm00001eb131350 +Zm00001eb417630 +Zm00001eb066320 +Zm00001eb194350 +Zm00001eb037540 +Zm00001eb000240 +Zm00001eb129040 +Zm00001eb071770 +Zm00001eb403180 +Zm00001eb349490 +Zm00001eb168290 +Zm00001eb100050 +Zm00001eb125630 +Zm00001eb121410 +Zm00001eb285770 +Zm00001eb269720 +Zm00001eb250600 +Zm00001eb268400 +Zm00001eb001140 +Zm00001eb242740 +Zm00001eb368160 +Zm00001eb048920 +Zm00001eb063160 +Zm00001eb289190 +Zm00001eb102800 +Zm00001eb184710 +Zm00001eb004410 +Zm00001eb219680 +Zm00001eb405150 +Zm00001eb288580 +Zm00001eb071980 +Zm00001eb308980 +Zm00001eb232420 +Zm00001eb083680 +Zm00001eb244840 +Zm00001eb384990 +Zm00001eb413010 +Zm00001eb426370 +Zm00001eb156030 +Zm00001eb106390 +Zm00001eb075130 +Zm00001eb359240 +Zm00001eb196560 +Zm00001eb067620 +Zm00001eb008420 +Zm00001eb045690 +Zm00001eb165310 +Zm00001eb144010 +Zm00001eb162310 +Zm00001eb014490 +Zm00001eb119940 +Zm00001eb043700 +Zm00001eb324240 +Zm00001eb198020 +Zm00001eb069550 +Zm00001eb025490 +Zm00001eb288890 +Zm00001eb123280 +Zm00001eb068310 +Zm00001eb399140 +Zm00001eb050630 +Zm00001eb065740 +Zm00001eb291320 +Zm00001eb175400 +Zm00001eb372490 +Zm00001eb077030 +Zm00001eb210000 +Zm00001eb180080 +Zm00001eb101130 +Zm00001eb118870 +Zm00001eb383160 +Zm00001eb072030 +Zm00001eb020740 +Zm00001eb186560 +Zm00001eb071870 +Zm00001eb110630 +Zm00001eb418090 +Zm00001eb319610 +Zm00001eb048410 +Zm00001eb168100 +Zm00001eb229440 +Zm00001eb114610 +Zm00001eb050140 +Zm00001eb243310 +Zm00001eb013060 +Zm00001eb127300 +Zm00001eb333050 +Zm00001eb059950 +Zm00001eb172020 +Zm00001eb142480 +Zm00001eb298010 +Zm00001eb355340 +Zm00001eb245670 +Zm00001eb117810 +Zm00001eb111800 +Zm00001eb381280 +Zm00001eb320600 +Zm00001eb313350 +Zm00001eb222590 +Zm00001eb408500 +Zm00001eb371280 +Zm00001eb202080 +Zm00001eb279030 +Zm00001eb293110 +Zm00001eb256600 +Zm00001eb019840 +Zm00001eb051950 +Zm00001eb023350 +Zm00001eb077910 +Zm00001eb102910 +Zm00001eb244600 +Zm00001eb361110 +Zm00001eb044540 +Zm00001eb090830 +Zm00001eb259070 +Zm00001eb017930 +Zm00001eb154120 +Zm00001eb044250 +Zm00001eb316460 +Zm00001eb167720 +Zm00001eb298560 +Zm00001eb423900 +Zm00001eb097160 +Zm00001eb369480 +Zm00001eb297490 +Zm00001eb353230 +Zm00001eb069010 +Zm00001eb111390 +Zm00001eb149270 +Zm00001eb167710 +Zm00001eb041120 +Zm00001eb238370 +Zm00001eb029540 +Zm00001eb168380 +Zm00001eb425540 +Zm00001eb246600 +Zm00001eb268360 +Zm00001eb410630 +Zm00001eb222660 +Zm00001eb005550 +Zm00001eb356300 +Zm00001eb059150 +Zm00001eb122680 +Zm00001eb347020 +Zm00001eb096870 +Zm00001eb317690 +Zm00001eb264210 +Zm00001eb431240 +Zm00001eb276790 +Zm00001eb299760 +Zm00001eb164390 +Zm00001eb327210 +Zm00001eb127310 +Zm00001eb394430 +Zm00001eb100690 +Zm00001eb297390 +Zm00001eb161400 +Zm00001eb043800 +Zm00001eb215400 +Zm00001eb244980 +Zm00001eb336580 +Zm00001eb292840 +Zm00001eb317540 +Zm00001eb208900 +Zm00001eb213110 +Zm00001eb258100 +Zm00001eb216040 +Zm00001eb019050 +Zm00001eb241570 +Zm00001eb084680 +Zm00001eb043900 +Zm00001eb433540 +Zm00001eb429260 +Zm00001eb323350 +Zm00001eb141430 +Zm00001eb379950 +Zm00001eb433870 +Zm00001eb333260 +Zm00001eb062390 +Zm00001eb016830 +Zm00001eb120240 +Zm00001eb083210 +Zm00001eb014500 +Zm00001eb278510 +Zm00001eb371800 +Zm00001eb363220 +Zm00001eb165580 +Zm00001eb071050 +Zm00001eb238620 +Zm00001eb049640 +Zm00001eb397520 +Zm00001eb184670 +Zm00001eb322600 +Zm00001eb164860 +Zm00001eb084290 +Zm00001eb259950 +Zm00001eb314610 +Zm00001eb000930 +Zm00001eb040280 +Zm00001eb059100 +Zm00001eb302310 +Zm00001eb095750 +Zm00001eb405030 +Zm00001eb062850 +Zm00001eb425100 +Zm00001eb185980 +Zm00001eb166680 +Zm00001eb336020 +Zm00001eb283230 +Zm00001eb074330 +Zm00001eb226330 +Zm00001eb329570 +Zm00001eb003210 +Zm00001eb091640 +Zm00001eb077930 +Zm00001eb103480 +Zm00001eb024440 +Zm00001eb255760 +Zm00001eb383670 +Zm00001eb158950 +Zm00001eb400470 +Zm00001eb214780 +Zm00001eb285340 +Zm00001eb258740 +Zm00001eb008150 +Zm00001eb033130 +Zm00001eb165630 +Zm00001eb406990 +Zm00001eb239520 +Zm00001eb147910 +Zm00001eb072690 +Zm00001eb178810 +Zm00001eb423890 +Zm00001eb021720 +Zm00001eb331830 +Zm00001eb431420 +Zm00001eb419980 +Zm00001eb375030 +Zm00001eb171510 +Zm00001eb132840 +Zm00001eb141830 +Zm00001eb284330 +Zm00001eb303620 +Zm00001eb041840 +Zm00001eb127150 +Zm00001eb288870 +Zm00001eb398950 +Zm00001eb370040 +Zm00001eb179690 +Zm00001eb042870 +Zm00001eb429270 +Zm00001eb002270 +Zm00001eb338290 +Zm00001eb391140 +Zm00001eb347630 +Zm00001eb286770 +Zm00001eb255120 +Zm00001eb246000 +Zm00001eb106250 +Zm00001eb036580 +Zm00001eb328350 +Zm00001eb137030 +Zm00001eb116200 +Zm00001eb323360 +Zm00001eb097930 +Zm00001eb383460 +Zm00001eb076680 +Zm00001eb384900 +Zm00001eb330910 +Zm00001eb138600 +Zm00001eb241920 +Zm00001eb254280 +Zm00001eb335970 +Zm00001eb289580 +Zm00001eb224490 +Zm00001eb073810 +Zm00001eb011430 +Zm00001eb077350 +Zm00001eb159960 +Zm00001eb222040 +Zm00001eb189060 +Zm00001eb367140 +Zm00001eb151750 +Zm00001eb165550 +Zm00001eb434360 +Zm00001eb133100 +Zm00001eb271880 +Zm00001eb379240 +Zm00001eb231160 +Zm00001eb408900 +Zm00001eb138370 +Zm00001eb000760 +Zm00001eb422370 +Zm00001eb049140 +Zm00001eb170050 +Zm00001eb161090 +Zm00001eb381210 +Zm00001eb294040 +Zm00001eb100570 +Zm00001eb127510 +Zm00001eb338800 +Zm00001eb432540 +Zm00001eb352470 +Zm00001eb369000 +Zm00001eb016620 +Zm00001eb165460 +Zm00001eb147860 +Zm00001eb286460 +Zm00001eb057290 +Zm00001eb394380 +Zm00001eb140540 +Zm00001eb379140 +Zm00001eb399590 +Zm00001eb079790 +Zm00001eb196640 +Zm00001eb087880 +Zm00001eb069530 +Zm00001eb099710 +Zm00001eb346980 +Zm00001eb009090 +Zm00001eb099410 +Zm00001eb069670 +Zm00001eb168220 +Zm00001eb040890 +Zm00001eb019440 +Zm00001eb372180 +Zm00001eb036940 +Zm00001eb080230 +Zm00001eb213670 +Zm00001eb066480 +Zm00001eb157820 +Zm00001eb342850 +Zm00001eb203230 +Zm00001eb142990 +Zm00001eb358080 +Zm00001eb058940 +Zm00001eb115750 +Zm00001eb040900 +Zm00001eb242320 +Zm00001eb304610 +Zm00001eb355890 +Zm00001eb244890 +Zm00001eb297540 +Zm00001eb377540 +Zm00001eb258010 +Zm00001eb118930 +Zm00001eb296840 +Zm00001eb206040 +Zm00001eb418150 +Zm00001eb063530 +Zm00001eb299990 +Zm00001eb002380 +Zm00001eb146310 +Zm00001eb005830 +Zm00001eb170590 +Zm00001eb411380 +Zm00001eb349120 +Zm00001eb419770 +Zm00001eb330710 +Zm00001eb408020 +Zm00001eb397750 +Zm00001eb063910 +Zm00001eb147550 +Zm00001eb195460 +Zm00001eb231900 +Zm00001eb030930 +Zm00001eb176220 diff --git a/example/inputs/gene_set_files/UP_gene_set.txt b/example/inputs/gene_set_files/UP_gene_set.txt new file mode 100644 index 0000000..f8e1363 --- /dev/null +++ b/example/inputs/gene_set_files/UP_gene_set.txt @@ -0,0 +1,568 @@ +Zm00001eb294870 +Zm00001eb258830 +Zm00001eb426280 +Zm00001eb176940 +Zm00001eb090970 +Zm00001eb192710 +Zm00001eb369590 +Zm00001eb013680 +Zm00001eb168570 +Zm00001eb037890 +Zm00001eb329520 +Zm00001eb366870 +Zm00001eb182650 +Zm00001eb360940 +Zm00001eb356390 +Zm00001eb124270 +Zm00001eb343720 +Zm00001eb326900 +Zm00001eb311050 +Zm00001eb316960 +Zm00001eb339970 +Zm00001eb183190 +Zm00001eb250540 +Zm00001eb112930 +Zm00001eb037600 +Zm00001eb005990 +Zm00001eb306790 +Zm00001eb353760 +Zm00001eb338460 +Zm00001eb281360 +Zm00001eb414350 +Zm00001eb042050 +Zm00001eb230820 +Zm00001eb218680 +Zm00001eb012160 +Zm00001eb027690 +Zm00001eb037690 +Zm00001eb286920 +Zm00001eb245620 +Zm00001eb361750 +Zm00001eb239380 +Zm00001eb351430 +Zm00001eb319700 +Zm00001eb054990 +Zm00001eb107050 +Zm00001eb324070 +Zm00001eb123910 +Zm00001eb378280 +Zm00001eb344210 +Zm00001eb104610 +Zm00001eb012930 +Zm00001eb254100 +Zm00001eb050850 +Zm00001eb004470 +Zm00001eb365690 +Zm00001eb013450 +Zm00001eb124800 +Zm00001eb384670 +Zm00001eb112820 +Zm00001eb379510 +Zm00001eb223860 +Zm00001eb006000 +Zm00001eb177420 +Zm00001eb139870 +Zm00001eb061880 +Zm00001eb317110 +Zm00001eb256460 +Zm00001eb145730 +Zm00001eb283510 +Zm00001eb398250 +Zm00001eb199500 +Zm00001eb409230 +Zm00001eb124960 +Zm00001eb190160 +Zm00001eb062170 +Zm00001eb001280 +Zm00001eb067040 +Zm00001eb051810 +Zm00001eb161780 +Zm00001eb366790 +Zm00001eb088070 +Zm00001eb084820 +Zm00001eb085460 +Zm00001eb338370 +Zm00001eb117180 +Zm00001eb395600 +Zm00001eb336190 +Zm00001eb062580 +Zm00001eb413930 +Zm00001eb115060 +Zm00001eb353580 +Zm00001eb021550 +Zm00001eb038070 +Zm00001eb079720 +Zm00001eb377120 +Zm00001eb296590 +Zm00001eb269420 +Zm00001eb278120 +Zm00001eb018570 +Zm00001eb215860 +Zm00001eb422690 +Zm00001eb364310 +Zm00001eb356420 +Zm00001eb393150 +Zm00001eb151950 +Zm00001eb399340 +Zm00001eb070460 +Zm00001eb147990 +Zm00001eb314060 +Zm00001eb411460 +Zm00001eb394050 +Zm00001eb396990 +Zm00001eb312900 +Zm00001eb326170 +Zm00001eb013150 +Zm00001eb352370 +Zm00001eb284750 +Zm00001eb001220 +Zm00001eb340450 +Zm00001eb012040 +Zm00001eb238010 +Zm00001eb079190 +Zm00001eb056510 +Zm00001eb249190 +Zm00001eb374090 +Zm00001eb113360 +Zm00001eb418610 +Zm00001eb302300 +Zm00001eb300240 +Zm00001eb117590 +Zm00001eb078730 +Zm00001eb107810 +Zm00001eb076550 +Zm00001eb160080 +Zm00001eb192910 +Zm00001eb330160 +Zm00001eb216950 +Zm00001eb168510 +Zm00001eb198620 +Zm00001eb382400 +Zm00001eb004630 +Zm00001eb303190 +Zm00001eb408530 +Zm00001eb149720 +Zm00001eb015550 +Zm00001eb214980 +Zm00001eb090910 +Zm00001eb122980 +Zm00001eb201130 +Zm00001eb366540 +Zm00001eb301590 +Zm00001eb018180 +Zm00001eb328100 +Zm00001eb158070 +Zm00001eb011900 +Zm00001eb123630 +Zm00001eb353170 +Zm00001eb130200 +Zm00001eb397930 +Zm00001eb419870 +Zm00001eb255110 +Zm00001eb400020 +Zm00001eb150050 +Zm00001eb327580 +Zm00001eb079740 +Zm00001eb148130 +Zm00001eb125590 +Zm00001eb124290 +Zm00001eb155620 +Zm00001eb392300 +Zm00001eb432150 +Zm00001eb260130 +Zm00001eb294160 +Zm00001eb089470 +Zm00001eb304050 +Zm00001eb088150 +Zm00001eb392910 +Zm00001eb052470 +Zm00001eb303770 +Zm00001eb281500 +Zm00001eb197000 +Zm00001eb293570 +Zm00001eb155720 +Zm00001eb158020 +Zm00001eb309920 +Zm00001eb314120 +Zm00001eb056010 +Zm00001eb068440 +Zm00001eb220700 +Zm00001eb175950 +Zm00001eb354150 +Zm00001eb030650 +Zm00001eb253850 +Zm00001eb124950 +Zm00001eb021990 +Zm00001eb149740 +Zm00001eb406620 +Zm00001eb295650 +Zm00001eb353870 +Zm00001eb431490 +Zm00001eb021600 +Zm00001eb075230 +Zm00001eb090060 +Zm00001eb282870 +Zm00001eb055260 +Zm00001eb242800 +Zm00001eb152570 +Zm00001eb061250 +Zm00001eb314180 +Zm00001eb157860 +Zm00001eb292950 +Zm00001eb123780 +Zm00001eb222250 +Zm00001eb101100 +Zm00001eb235710 +Zm00001eb238160 +Zm00001eb387370 +Zm00001eb299240 +Zm00001eb002590 +Zm00001eb252850 +Zm00001eb238560 +Zm00001eb314670 +Zm00001eb124830 +Zm00001eb289500 +Zm00001eb011930 +Zm00001eb293480 +Zm00001eb255510 +Zm00001eb357020 +Zm00001eb308670 +Zm00001eb328020 +Zm00001eb270810 +Zm00001eb409100 +Zm00001eb089630 +Zm00001eb367840 +Zm00001eb176720 +Zm00001eb395390 +Zm00001eb406490 +Zm00001eb141210 +Zm00001eb285170 +Zm00001eb158650 +Zm00001eb420610 +Zm00001eb395500 +Zm00001eb199060 +Zm00001eb014210 +Zm00001eb214110 +Zm00001eb312110 +Zm00001eb043620 +Zm00001eb311590 +Zm00001eb281410 +Zm00001eb337450 +Zm00001eb178990 +Zm00001eb044860 +Zm00001eb013430 +Zm00001eb146690 +Zm00001eb199490 +Zm00001eb058700 +Zm00001eb247320 +Zm00001eb148420 +Zm00001eb371140 +Zm00001eb156150 +Zm00001eb248930 +Zm00001eb057350 +Zm00001eb165430 +Zm00001eb371530 +Zm00001eb360800 +Zm00001eb075660 +Zm00001eb019580 +Zm00001eb225200 +Zm00001eb340440 +Zm00001eb081770 +Zm00001eb050500 +Zm00001eb399220 +Zm00001eb188520 +Zm00001eb325340 +Zm00001eb287980 +Zm00001eb296480 +Zm00001eb188510 +Zm00001eb158640 +Zm00001eb083620 +Zm00001eb033200 +Zm00001eb103010 +Zm00001eb366860 +Zm00001eb110050 +Zm00001eb252530 +Zm00001eb255970 +Zm00001eb187010 +Zm00001eb235800 +Zm00001eb361860 +Zm00001eb257080 +Zm00001eb322190 +Zm00001eb250120 +Zm00001eb022390 +Zm00001eb201210 +Zm00001eb314230 +Zm00001eb322660 +Zm00001eb277280 +Zm00001eb160470 +Zm00001eb033790 +Zm00001eb220740 +Zm00001eb399360 +Zm00001eb343430 +Zm00001eb344260 +Zm00001eb016250 +Zm00001eb362210 +Zm00001eb095580 +Zm00001eb147950 +Zm00001eb044080 +Zm00001eb061930 +Zm00001eb232730 +Zm00001eb424670 +Zm00001eb073550 +Zm00001eb335060 +Zm00001eb122360 +Zm00001eb027960 +Zm00001eb348760 +Zm00001eb141500 +Zm00001eb328450 +Zm00001eb237960 +Zm00001eb278460 +Zm00001eb330590 +Zm00001eb288360 +Zm00001eb290550 +Zm00001eb161610 +Zm00001eb016290 +Zm00001eb215030 +Zm00001eb357040 +Zm00001eb289050 +Zm00001eb388760 +Zm00001eb408850 +Zm00001eb073880 +Zm00001eb187570 +Zm00001eb273340 +Zm00001eb062560 +Zm00001eb402590 +Zm00001eb405590 +Zm00001eb271420 +Zm00001eb154350 +Zm00001eb427650 +Zm00001eb289950 +Zm00001eb200500 +Zm00001eb279710 +Zm00001eb048780 +Zm00001eb042240 +Zm00001eb299280 +Zm00001eb335430 +Zm00001eb011880 +Zm00001eb294200 +Zm00001eb339080 +Zm00001eb300060 +Zm00001eb060700 +Zm00001eb413290 +Zm00001eb126900 +Zm00001eb395360 +Zm00001eb338000 +Zm00001eb325330 +Zm00001eb231720 +Zm00001eb229460 +Zm00001eb233300 +Zm00001eb415520 +Zm00001eb124380 +Zm00001eb021270 +Zm00001eb259960 +Zm00001eb125820 +Zm00001eb166240 +Zm00001eb192200 +Zm00001eb015990 +Zm00001eb146650 +Zm00001eb307340 +Zm00001eb058460 +Zm00001eb367730 +Zm00001eb058820 +Zm00001eb232620 +Zm00001eb075250 +Zm00001eb161560 +Zm00001eb158500 +Zm00001eb159180 +Zm00001eb090980 +Zm00001eb308610 +Zm00001eb054730 +Zm00001eb354860 +Zm00001eb352760 +Zm00001eb091920 +Zm00001eb371810 +Zm00001eb356980 +Zm00001eb017830 +Zm00001eb179280 +Zm00001eb417550 +Zm00001eb200610 +Zm00001eb003650 +Zm00001eb407710 +Zm00001eb113920 +Zm00001eb103480 +Zm00001eb015770 +Zm00001eb421070 +Zm00001eb230220 +Zm00001eb018720 +Zm00001eb277370 +Zm00001eb038270 +Zm00001eb088790 +Zm00001eb155430 +Zm00001eb172970 +Zm00001eb124940 +Zm00001eb403700 +Zm00001eb306010 +Zm00001eb018700 +Zm00001eb129480 +Zm00001eb050470 +Zm00001eb312160 +Zm00001eb048490 +Zm00001eb226470 +Zm00001eb163520 +Zm00001eb098220 +Zm00001eb009540 +Zm00001eb206560 +Zm00001eb293780 +Zm00001eb336760 +Zm00001eb350120 +Zm00001eb322670 +Zm00001eb414650 +Zm00001eb368900 +Zm00001eb313950 +Zm00001eb423470 +Zm00001eb223680 +Zm00001eb242950 +Zm00001eb340190 +Zm00001eb198790 +Zm00001eb121580 +Zm00001eb187340 +Zm00001eb257130 +Zm00001eb219770 +Zm00001eb290330 +Zm00001eb363910 +Zm00001eb296340 +Zm00001eb136760 +Zm00001eb319020 +Zm00001eb430440 +Zm00001eb130650 +Zm00001eb106950 +Zm00001eb220880 +Zm00001eb196180 +Zm00001eb414340 +Zm00001eb113490 +Zm00001eb100770 +Zm00001eb016850 +Zm00001eb020260 +Zm00001eb106850 +Zm00001eb241800 +Zm00001eb121900 +Zm00001eb125050 +Zm00001eb104710 +Zm00001eb034050 +Zm00001eb212120 +Zm00001eb105870 +Zm00001eb156130 +Zm00001eb310220 +Zm00001eb247750 +Zm00001eb371680 +Zm00001eb057010 +Zm00001eb099440 +Zm00001eb397730 +Zm00001eb357000 +Zm00001eb117480 +Zm00001eb159000 +Zm00001eb180830 +Zm00001eb083180 +Zm00001eb123380 +Zm00001eb368000 +Zm00001eb086870 +Zm00001eb425600 +Zm00001eb224260 +Zm00001eb214250 +Zm00001eb041390 +Zm00001eb228230 +Zm00001eb121990 +Zm00001eb340000 +Zm00001eb215900 +Zm00001eb223620 +Zm00001eb131140 +Zm00001eb283860 +Zm00001eb015620 +Zm00001eb017280 +Zm00001eb279740 +Zm00001eb384760 +Zm00001eb203130 +Zm00001eb015980 +Zm00001eb129420 +Zm00001eb314880 +Zm00001eb009340 +Zm00001eb224230 +Zm00001eb337410 +Zm00001eb313980 +Zm00001eb220250 +Zm00001eb154450 +Zm00001eb359460 +Zm00001eb285360 +Zm00001eb005250 +Zm00001eb250070 +Zm00001eb291540 +Zm00001eb266820 +Zm00001eb337270 +Zm00001eb409910 +Zm00001eb424620 +Zm00001eb352470 +Zm00001eb400610 +Zm00001eb033580 +Zm00001eb351790 +Zm00001eb314810 +Zm00001eb222340 +Zm00001eb338980 +Zm00001eb400620 +Zm00001eb367110 +Zm00001eb251230 +Zm00001eb121240 +Zm00001eb169870 +Zm00001eb245030 +Zm00001eb152450 +Zm00001eb138530 +Zm00001eb274690 +Zm00001eb119440 +Zm00001eb101050 +Zm00001eb083610 +Zm00001eb116230 +Zm00001eb337970 +Zm00001eb163240 +Zm00001eb018960 +Zm00001eb394150 +Zm00001eb442940 +Zm00001eb411070 +Zm00001eb288700 +Zm00001eb357950 +Zm00001eb059930 +Zm00001eb294480 +Zm00001eb041000 +Zm00001eb200520 +Zm00001eb193860 +Zm00001eb086700 +Zm00001eb296430 +Zm00001eb148650 +Zm00001eb261920 +Zm00001eb189370 +Zm00001eb222960 +Zm00001eb012590 +Zm00001eb397600 +Zm00001eb419390 +Zm00001eb154070 +Zm00001eb279120 +Zm00001eb157170 +Zm00001eb201180 +Zm00001eb367740 +Zm00001eb245230 +Zm00001eb299000 +Zm00001eb116960 +Zm00001eb401450 +Zm00001eb060680 +Zm00001eb213180 +Zm00001eb055430 +Zm00001eb173640 +Zm00001eb414530 +Zm00001eb159030 +Zm00001eb015400 +Zm00001eb249520 +Zm00001eb374870 +Zm00001eb041030 +Zm00001eb003240 +Zm00001eb003440 +Zm00001eb320650 +Zm00001eb294880 +Zm00001eb006690 diff --git a/example/outputs_icres/.gitkeep b/example/outputs_icres/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/mini_ac.config b/mini_ac.config index 1d2cdad..1ea82db 100644 --- a/mini_ac.config +++ b/mini_ac.config @@ -38,7 +38,7 @@ executor { name = 'local' } -/// WHEN USING HPC SGE UNCOMMENT THE CODE BELOW +// // WHEN USING HPC SGE UNCOMMENT THE CODE BELOW // executor { // name = 'sge' diff --git a/mini_ac.nf b/mini_ac.nf index 4f81ca6..4d5e1e1 100755 --- a/mini_ac.nf +++ b/mini_ac.nf @@ -1,5 +1,4 @@ nextflow.enable.dsl = 2 - include { genome_wide_miniac } from './workflows/miniac_gw' include { locus_based_miniac } from './workflows/miniac_lb' @@ -32,6 +31,7 @@ workflow MINIAC { params.Feature_file = "$projectDir/data/${species}/${species}_go_gene_file.txt" params.TF_fam_file = "$projectDir/data/${species}/${species}_TF_family_file.txt" params.Genes_metadata = "$projectDir/data/${species}/${species}_gene_metadata_file.txt" + ACR_files = Channel.fromPath("${params.ACR_dir}/*.bed").ifEmpty { error "No *.bed files could be found in the specified ACR directory ${params.ACR_dir}" } if (params.mode == "genome_wide") { @@ -41,7 +41,7 @@ workflow MINIAC { params.P_val = 0.1 - genome_wide_miniac(params) + genome_wide_miniac(params, ACR_files) } else if (params.mode == "locus_based") { @@ -50,7 +50,7 @@ workflow MINIAC { params.P_val = 0.01 - locus_based_miniac(params) + locus_based_miniac(params, ACR_files) } else { exit 1, "MINI-AC can only be run using the modes 'genome_wide' or 'locus_based'. Instead it got '${params.mode}'." diff --git a/mini_ac_icres.config b/mini_ac_icres.config new file mode 100644 index 0000000..322a2c4 --- /dev/null +++ b/mini_ac_icres.config @@ -0,0 +1,110 @@ +process.container = "vibpsb/mini-ac:latest" +singularity { + enabled = true + cacheDir = "singularity_cache" + autoMounts = true +} + +workDir = './work' + +params { + + //// Output folder + OutDir = "$projectDir/example/outputs_icres" + + //// Required input + Gene_list_dir = "$projectDir/example/inputs/gene_set_files" + + //// Optional input + // Differential expression data + DE_genes = false + DE_genes_dir = "$projectDir/example/inputs/de_files" + One_DE_set = true + // Expression data + Filter_set_genes = false + Set_genes_dir = "$projectDir/example/inputs/exp_genes_files" + One_filtering_set = true + + //// Prediction parameters + Bps_intersect = false + + + //// Prediction parameters only genome-wide + Second_gene_annot = false + Second_gene_dist = 500 +} + +executor { + name = 'local' +} + +// // WHEN USING HPC SGE UNCOMMENT THE CODE BELOW + +// executor { +// name = 'sge' +// queueSize = 25 +// } + + +// process { +// withName: GENE_LIST_TO_ICRES_BED { +// clusterOptions = '-l h_vmem=4G' +// } +// withName: get_ACR_shufflings { +// clusterOptions = '-l h_vmem=4G' +// } +// withName: getStats { +// clusterOptions = '-l h_vmem=10G' +// } +// withName: getStats_bps { +// clusterOptions = '-l h_vmem=50G' +// } +// withName: getNetwork { +// clusterOptions = '-l h_vmem=20G' +// } +// withName: filterSetOfGenes { +// clusterOptions = '-l h_vmem=5G' +// } +// withName: GOenrichment { +// clusterOptions = '-l h_vmem=5G' +// } +// withName: getIntegrativeOutputs { +// clusterOptions = '-l h_vmem=3G' +// } +// } + + +// // WHEN USING HPC SLURM UNCOMMENT THE CODE BELOW + +// executor { +// name = 'slurm' +// queueSize = 25 +// } + + +// process { +// withName: GENE_LIST_TO_ICRES_BED { +// clusterOptions = '--mem 4G' +// } +// withName: get_ACR_shufflings { +// clusterOptions = '--mem 4G' +// } +// withName: getStats { +// clusterOptions = '--mem 10G' +// } +// withName: getStats_bps { +// clusterOptions = '--mem 50G' +// } +// withName: getNetwork { +// clusterOptions = '--mem 20G' +// } +// withName: filterSetOfGenes { +// clusterOptions = '--mem 5G' +// } +// withName: GOenrichment { +// clusterOptions = '--mem 5G' +// } +// withName: getIntegrativeOutputs { +// clusterOptions = '--mem 3G' +// } +// } diff --git a/mini_ac_icres.nf b/mini_ac_icres.nf new file mode 100644 index 0000000..d44941d --- /dev/null +++ b/mini_ac_icres.nf @@ -0,0 +1,66 @@ +nextflow.enable.dsl = 2 +include { genome_wide_miniac } from './workflows/miniac_gw' +include { GENE_LIST_TO_ICRES_BED } from './modules/gene_list_to_iCREs_BED/main.nf' + +workflow MINIAC_ICRES { + + params.OBO_file = "$projectDir/data/ontologies/go.obo" + params.Shuffle_count = 1000 + params.Shuffle_seed = -1 + params.Csv_output = false + + // define species id used for data subfolder and data file prefix + def species + switch(params.species) { + case "maize_v4": + species = "zma_v4" + break + case "maize_v5": + species = "zma_v5" + break + default: + exit 1, "MINI-AC iCREs can only be run for the species 'maize_v4' and 'maize_v5' (not Arabidopsis). Instead it got '${params.species}'." + } + + // define iCREs set used for iCREs coordinates file retrieval + def icres_set + switch(params.icres_set) { + case "all": + icres_set = "all_icres" + break + case "maxf1": + icres_set = "maxf1_icres" + break + default: + exit 1, "MINI-AC iCREs can only be run for two sets: all and maxf1. Instead it got '${params.icres_set}'." + } + + // set input data parameters for the execution of MINI-AC genome-wide mode + params.Faix_file = "$projectDir/data/${species}/${species}.fasta.fai" + params.Motif_tf_file = "$projectDir/data/${species}/${species}_motif_TF_file.txt" + params.Feature_file = "$projectDir/data/${species}/${species}_go_gene_file.txt" + params.TF_fam_file = "$projectDir/data/${species}/${species}_TF_family_file.txt" + params.Genes_metadata = "$projectDir/data/${species}/${species}_gene_metadata_file.txt" + + params.icres_set_file = "$projectDir/data/icres/${icres_set}_${species}.bed" + + Gene_sets = Channel.fromPath("${params.Gene_list_dir}/*.txt").ifEmpty { error "No *.txt files could be found in the specified gene sets directory ${params.Gene_list_dir}" } + + script_get_icres_bed = "${projectDir}/bin/geneList2iCREs.py" + + + ACR_files = GENE_LIST_TO_ICRES_BED(params.icres_set_file, Gene_sets, script_get_icres_bed) + + params.MotMapsFile = "$projectDir/data/${species}/${species}_genome_wide_motif_mappings.bed" + params.Non_cod_genome = "$projectDir/data/${species}/${species}_noncod_merged.bed" + params.Genes_coords = "$projectDir/data/${species}/${species}_genes_coords_sorted.bed" + + params.P_val = 0.1 + + genome_wide_miniac(params, ACR_files) + +} + +workflow { + MINIAC_ICRES() +} \ No newline at end of file diff --git a/modules/gene_list_to_iCREs_BED/main.nf b/modules/gene_list_to_iCREs_BED/main.nf new file mode 100644 index 0000000..4a8d91d --- /dev/null +++ b/modules/gene_list_to_iCREs_BED/main.nf @@ -0,0 +1,20 @@ +process GENE_LIST_TO_ICRES_BED { + tag "${gene_list.baseName}" + + + input: + path annotated_icres + path gene_list + path script_geneslist2bed + + output: + path "${gene_list.baseName}_icres.bed" + + script: + + """ + python3 $script_geneslist2bed $annotated_icres $gene_list ${gene_list.baseName}_icres.bed + + """ + +} \ No newline at end of file diff --git a/workflows/miniac_gw.nf b/workflows/miniac_gw.nf index 7560a00..06d39a3 100755 --- a/workflows/miniac_gw.nf +++ b/workflows/miniac_gw.nf @@ -263,13 +263,12 @@ process getIntegrativeOutputs { workflow genome_wide_miniac { take: params + ACR_files main: if (!file(params.MotMapsFile).exists()) { error "Please make sure that you downloaded the motif mapping files as described in the documentation." } - ACR_files = Channel.fromPath("${params.ACR_dir}/*.bed").ifEmpty { error "No *.bed files could be found in the specified ACR directory ${params.ACR_dir}" } - get_ACR_shufflings(ACR_files, params.Faix_file, params.Non_cod_genome, params.Shuffle_count, params.Shuffle_seed) acr_shufflings_ch = get_ACR_shufflings.out.shufflings diff --git a/workflows/miniac_lb.nf b/workflows/miniac_lb.nf index e84fd43..9befac3 100755 --- a/workflows/miniac_lb.nf +++ b/workflows/miniac_lb.nf @@ -269,12 +269,11 @@ process getIntegrativeOutputs { workflow locus_based_miniac { take: params + ACR_files main: if (!file(params.MotMapsFile).exists()) { error "Please make sure that you downloaded the motif mapping files as described in the documentation." } - - ACR_files = Channel.fromPath("${params.ACR_dir}/*.bed").ifEmpty { error "No *.bed files could be found in the specified ACR directory ${params.ACR_dir}" } get_ACR_shufflings(ACR_files, params.Faix_file, params.Promoter_file, params.Shuffle_count, params.Shuffle_seed)