RECETOX · hechth · Dec 4, 2023 · Dec 4, 2023 · Dec 4, 2023 · Dec 4, 2023
diff --git a/tools/ipapy2/.shed.yml b/tools/ipapy2/.shed.yml
@@ -0,0 +1,15 @@
+name: ipaPy2
+owner: 
+remote_repository_url: "https://github.com/RECETOX/galaxytools/tree/master/tools/ipapy2"
+homepage_url: "https://github.com/francescodc87/ipaPy2"
+categories:
+  - Metabolomics
+description: "Mass spectrometry data annotation tool."
+long_description: "New Python implementation of the Integrated Probabilistic Annotation (IPA) - A Bayesian annotation method for LC/MS data integrating biochemical relations, isotope patterns and adduct formation."
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  description_template: "{{ tool_name }} tool from the ipaPy2 package"
+suite:
+  name: suite_ipapy2
+  description: tools from the ipaPy2 suite are used for annotation of mass spectrometry data
+  type: repository_suite_definition
diff --git a/tools/ipapy2/ipapy2_clustering.py b/tools/ipapy2/ipapy2_clustering.py
@@ -0,0 +1,16 @@
+import click
+import pandas as pd
+from ipaPy2 import ipa
+
+
+@click.group(invoke_without_command=True)
+@click.option('--i', 'input_filename', type=click.Path(exists=True), required=True)
+@click.option('--o', 'output_filename', type=click.Path(writable=True), required=True)
+def cli(input_filename, output_filename):
+    intensity_table = pd.read_csv(input_filename)
+    result = ipa.clusterFeatures(intensity_table)
+    result.to_csv(output_filename, index=False)
+
+
+if __name__ == '__main__':
+    cli()
diff --git a/tools/ipapy2/ipapy2_clustering.xml b/tools/ipapy2/ipapy2_clustering.xml
@@ -0,0 +1,60 @@
+<tool id="ipapy2_clustering" name="ipaPy2 clustering" version="@TOOL_VERSION@+galaxy0" python_template_version="3.5" profile="21.05">
-<tool id="ipapy2_clustering" name="ipaPy2 clustering" version="@TOOL_VERSION@+galaxy0" python_template_version="3.5" profile="21.05">
+<tool id="ipapy2_clustering" name="ipaPy2 clustering" version="@TOOL_VERSION@+galaxy0" profile="21.05">
-<tool id="ipapy2_clustering" name="ipaPy2 clustering" version="@TOOL_VERSION@+galaxy0" python_template_version="3.5" profile="21.05">
+<tool id="ipapy2_clustering" name="ipaPy2 clustering" version="@TOOL_VERSION@+galaxy0" profile="21.05">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
+        <requirement type="package" version="8.0.1">click</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        python3 ${__tool_directory__}/ipapy2_clustering.py --i '${intensity_table}' --o '${output}'
-        python3 ${__tool_directory__}/ipapy2_clustering.py --i '${intensity_table}' --o '${output}'
+        python3 '${__tool_directory__}/ipapy2_clustering.py' --i '${intensity_table}' --o '${output}'
-        python3 ${__tool_directory__}/ipapy2_clustering.py --i '${intensity_table}' --o '${output}'
+        python3 '${__tool_directory__}/ipapy2_clustering.py' --i '${intensity_table}' --o '${output}'
+    ]]></command>
+
+    <inputs>
+        <param label="Intensity table" name="intensity_table" type="data" format="csv" help="Mass spectral library file." />
+    </inputs>
+
+    <outputs>
+        <data label="${tool.name} on ${on_string}" name="output" format="csv"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="intensity_table" value="minimal_input.csv"/>
+            <output name="output" file="clustering.csv"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+    Before using the ipaPy2 package, the processed data coming from an untargeted metabolomics experiment must be properly prepared.
+    The data must be organized in a pandas dataframe containing the following columns:
+
+    - **ids**: an unique numeric id for each mass spectrometry feature feature
+    - **rel.ids**: relation ids. Features must be clustered based on correlation/peak shape/retention time. Features in the same cluster are likely to come from the same metabolite.
+    - **mzs**: mass-to-charge ratios, usually the average across different samples.
+    - **RTs**: retention times in seconds, usually the average across different samples.
+    - **Int**: representative (e.g., maximum or average) intensity detected for each feature across samples (either peak area or peak intensity)
+
+    The clustering of the features is a necessary and must be performed before running the IPA method.
+    For this step, the use of widely used data processing software such as mzMatch and CAMERA is recommended.
+    Nevertheless, the ipaPy2 library provides a function (clusterFeatures()) able to perform such step,
+    starting from a dataframe containing the measured intensities across several samples (at least 3 samples, the more samples the better).
+    Such dataframe should be organized as follows:
+
+    +----+------+-----+-------------+-------------+-------------+
+    | id | mz   | rt  | intensity_1 | intensity_2 | intensity_3 |
+    +====+======+=====+=============+=============+=============+
+    | 1  | 100  | 10  | 500         | 600         | 700         |
+    +----+------+-----+-------------+-------------+-------------+
+    | 2  | 200  | 20  | 800         | 900         | 1000        |
+    +----+------+-----+-------------+-------------+-------------+
+    | 3  | 300  | 30  | 1100        | 1200        | 1300        |
+    +----+------+-----+-------------+-------------+-------------+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1021/acs.analchem.9b02354</citation>
+        <citation type="doi">10.1093/bioinformatics/btad455</citation>
+    </citations>
+</tool>
diff --git a/tools/ipapy2/macros.xml b/tools/ipapy2/macros.xml
@@ -0,0 +1,3 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.3.0</token>
+</macros>
diff --git a/tools/ipapy2/test-data/clustering.csv b/tools/ipapy2/test-data/clustering.csv
@@ -0,0 +1,10 @@
+ids,rel.ids,mzs,RTs,Int
+0,0,110.0346328990618,189.90331509802155,3786719.935982465
+1,1,112.0506647763902,163.27735969175174,142356656.91329387
+2,2,112.07565637582633,185.44492145379184,4232563.395784442
+3,3,113.0233876257136,172.1718694530046,814718.2931617
+4,4,113.05983818307936,170.6316635193729,25946536.94788723
+5,5,113.05983777733188,180.8824750038957,14719570.784554532
+6,6,114.03726205384052,184.9377656087088,2277920.5184312323
+7,7,114.06619405738884,166.3394005272794,12877950.34415196
+8,8,114.06619243777511,179.11913090787945,4012725.880463593
diff --git a/tools/ipapy2/test-data/minimal_input.csv b/tools/ipapy2/test-data/minimal_input.csv
@@ -0,0 +1,10 @@
+id,mz,rt,data06_intensity,data07_intensity,data08_intensity
+0,110.03463289906179,189.90331509802155,3786719.935982465,3337130.2371725794
+1,112.05066477639019,163.27735969175174,142356656.91329387,84292123.72052653,116220468.2058125
+2,112.07565637582633,185.44492145379184,3027831.662547251,4232563.395784442
+3,113.02338762571361,172.1718694530046,814718.2931617,667412.6145220067
+4,113.05983818307935,170.6316635193729,9769737.443290893,25946536.94788723,15256613.822737923
+5,113.05983777733188,180.8824750038957,5083952.726032479,14719570.784554532,11882839.70348706
+6,114.03726205384051,184.9377656087088,2277920.5184312323,1942789.2956770866
+7,114.06619405738884,166.3394005272794,4854729.25883474,12877950.34415196,0.0
+8,114.06619243777511,179.11913090787945,1029415.0130157267,0.0,4012725.880463593