From a4b0948fe721492f9e61f82d4c7b97b351a65645 Mon Sep 17 00:00:00 2001 From: Ben Solomon Date: Mon, 9 Sep 2024 04:41:30 -0700 Subject: [PATCH] Add data, methods, and descriptions for Atchley factors (#5) * Add data, methods, and descriptions for Atchley factors * Add Atchley factors to `Peptide` descriptors index * Fix outdated doctest in `Peptide.descriptors` --------- Co-authored-by: Martin Larralde --- README.md | 2 +- docs/api/descriptors.rst | 2 ++ docs/api/index.rst | 1 + docs/index.rst | 1 + peptides/__init__.py | 57 ++++++++++++++++++++++++++++++++- peptides/tables/atchley/AF1.csv | 20 ++++++++++++ peptides/tables/atchley/AF2.csv | 20 ++++++++++++ peptides/tables/atchley/AF3.csv | 20 ++++++++++++ peptides/tables/atchley/AF4.csv | 20 ++++++++++++ peptides/tables/atchley/AF5.csv | 20 ++++++++++++ 10 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 peptides/tables/atchley/AF1.csv create mode 100644 peptides/tables/atchley/AF2.csv create mode 100644 peptides/tables/atchley/AF3.csv create mode 100644 peptides/tables/atchley/AF4.csv create mode 100644 peptides/tables/atchley/AF5.csv diff --git a/README.md b/README.md index 4fbd4ead..e0fc45dd 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ versions (3.6+). A non-exhaustive list of available features: - Peptide statistics: amino acid counts and frequencies. -- **QSAR** descriptors: BLOSUM indices, Cruciani properties, FASGAI vectors, Kidera factors, MS-WHIM scores, PCP descriptors, ProtFP descriptors, Sneath vectors, ST-scales, T-scales, VHSE-scales, Z-scales. +- **QSAR** descriptors: BLOSUM indices, Cruciani properties, FASGAI vectors, Kidera factors, Atchley factors, MS-WHIM scores, PCP descriptors, ProtFP descriptors, Sneath vectors, ST-scales, T-scales, VHSE-scales, Z-scales. - Sequence profiles: hydrophobicity, hydrophobic moment, membrane position. - Physicochemical properties: aliphatic index, instability index, theoretical net charge, isoelectric point, molecular weight (with isotope labelling support). - Biological properties: structural class prediction. diff --git a/docs/api/descriptors.rst b/docs/api/descriptors.rst index e2e4bd9d..28dab75a 100644 --- a/docs/api/descriptors.rst +++ b/docs/api/descriptors.rst @@ -11,6 +11,8 @@ Descriptors .. autoclass:: peptides.KideraFactors(typing.NamedTuple) +.. autoclass:: peptides.AtchleyFactors(typing.NamedTuple) + .. autoclass:: peptides.MSWHIMScores(typing.NamedTuple) .. autoclass:: peptides.PhysicalDescriptors(typing.NamedTuple) diff --git a/docs/api/index.rst b/docs/api/index.rst index 4643a91f..0990c21e 100644 --- a/docs/api/index.rst +++ b/docs/api/index.rst @@ -27,6 +27,7 @@ Descriptors CrucianiProperties FasgaiVectors KideraFactors + AtchleyFactors MSWHIMScores PhysicalDescriptors PCPDescriptors diff --git a/docs/index.rst b/docs/index.rst index 9b355fe8..0a8f6406 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -81,6 +81,7 @@ A non-exhaustive list of available features: - `Cruciani properties `_ - `FASGAI vectors `_ - `Kidera factors `_ + - `Atchley factors `_ - `MS-WHIM scores `_ - `PCP descriptors `_ - `ProtFP descriptors `_ diff --git a/peptides/__init__.py b/peptides/__init__.py index 7fdd3d97..42a36d12 100644 --- a/peptides/__init__.py +++ b/peptides/__init__.py @@ -144,6 +144,34 @@ class KideraFactors(typing.NamedTuple): kf10: float +class AtchleyFactors(typing.NamedTuple): + """The Atchley factors of a peptide. + + The Atchley Factors were originally derived by applying multivariate + analysis to 494 physical properties of the 20 amino acids and using + dimension reduction techniques. + + Attributes: + af1 (`float`): A factor modeling polarity, accessibility, and hydrophobicity. + af2 (`float`): A factor modeling propensity for secondary structure. + af3 (`float`): A factor modeling molecular size. + af4 (`float`): A factor modeling codon composition. + af5 (`float`): A factor modeling electrostatic charge. + + References: + - Atchley, W. R., Zhao, J., Fernandes, A. D., Drüke, T. + *Solving the protein sequence metric problem*. + Proceedings of the National Academy of Sciences. + Apr 2005;102(18):6395-6400. :doi:`10.1073/pnas.040867710`. + + """ + af1: float + af2: float + af3: float + af4: float + af5: float + + class MSWHIMScores(typing.NamedTuple): """The MS-WHIM scores of a peptide. @@ -648,7 +676,7 @@ def descriptors(self) -> typing.Dict[str, float]: Example: >>> peptide = Peptide("SDKEVDEVDAALSDLEITLE") >>> sorted(peptide.descriptors().keys()) - ['BLOSUM1', ..., 'F1', ..., 'KF1', ..., 'MSWHIM1', ..., 'PP1', ...] + ['AF1', ..., 'F1', ..., 'KF1', ..., 'MSWHIM1', ..., 'PP1', ...] Hint: Use this method to create a `~pandas.DataFrame` containing the @@ -2115,6 +2143,32 @@ def kidera_factors(self) -> KideraFactors: out.append(_sum(p) / len(self)) return KideraFactors(*out) + def atchley_factors(self) -> AtchleyFactors: + """Compute the Atchley factors of the peptide. + + See `~peptides.AtchleyFactors` for more information. + + Returns: + `peptides.AtchleyFactors`: The computed average Atchley factors + for all the amino acids in the peptide. + + Example: + >>> peptide = Peptide("KLKLLLLLKLK") + >>> for i, kf in enumerate(peptide.atchley_factors()): + ... print(f"AF{i+1:<3} {kf: .4f}") + AF1 0.0176 + AF2 -0.8321 + AF3 -0.7636 + AF4 0.7048 + AF5 0.0189 + + """ + out = [] + for i in range(len(tables.ATCHLEY)): + p = self.profile(tables.ATCHLEY[f"AF{i+1}"]) + out.append(_sum(p) / len(self)) + return AtchleyFactors(*out) + def ms_whim_scores(self) -> MSWHIMScores: """Compute the MS-WHIM scores of the peptide. @@ -2407,6 +2461,7 @@ def z_scales(self) -> ZScales: return ZScales(*out) __DESCRIPTORS = { + "AF": atchley_factors, "BLOSUM": blosum_indices, "PP": cruciani_properties, "F": fasgai_vectors, diff --git a/peptides/tables/atchley/AF1.csv b/peptides/tables/atchley/AF1.csv new file mode 100644 index 00000000..d7b141a7 --- /dev/null +++ b/peptides/tables/atchley/AF1.csv @@ -0,0 +1,20 @@ +A,-0.59145974 +R,1.53754853 +N,0.94535614 +D,1.05015062 +C,-1.34267179 +Q,0.93056541 +E,1.35733226 +G,-0.38387987 +H,0.33616543 +I,-1.23936304 +L,-1.01895162 +K,1.83146558 +M,-0.66312569 +F,-1.00610084 +P,0.18862522 +S,-0.22788299 +T,-0.03181782 +W,-0.59533918 +Y,0.25999617 +V,-1.33661279 diff --git a/peptides/tables/atchley/AF2.csv b/peptides/tables/atchley/AF2.csv new file mode 100644 index 00000000..66d43005 --- /dev/null +++ b/peptides/tables/atchley/AF2.csv @@ -0,0 +1,20 @@ +A,-1.30209266 +R,-0.05472897 +N,0.82846219 +D,0.30242411 +C,0.465423 +Q,-0.17926549 +E,-1.45275578 +G,1.65201497 +H,-0.4166278 +I,-0.54652238 +L,-0.98693471 +K,-0.56109831 +M,-1.52353917 +F,-0.59046634 +P,2.08084151 +S,1.39869991 +T,0.32571153 +W,0.0090776 +Y,0.82992312 +V,-0.27854634 diff --git a/peptides/tables/atchley/AF3.csv b/peptides/tables/atchley/AF3.csv new file mode 100644 index 00000000..2bf04821 --- /dev/null +++ b/peptides/tables/atchley/AF3.csv @@ -0,0 +1,20 @@ +A,-0.7330651 +R,1.5021086 +N,1.2991286 +D,-3.6559147 +C,-0.8620345 +Q,-3.0048731 +E,1.476661 +G,1.3301017 +H,-1.673369 +I,2.1314349 +L,-1.5046185 +K,0.5332237 +M,2.2194787 +F,1.8909687 +P,-1.6283286 +S,-4.7596375 +T,2.2134612 +W,0.6719274 +Y,3.0973596 +V,-0.5440132 diff --git a/peptides/tables/atchley/AF4.csv b/peptides/tables/atchley/AF4.csv new file mode 100644 index 00000000..7e2d1cb4 --- /dev/null +++ b/peptides/tables/atchley/AF4.csv @@ -0,0 +1,20 @@ +A,1.5703918 +R,0.4403185 +N,-0.1688162 +D,-0.2590236 +C,-1.0200786 +Q,-0.502591 +E,0.1129444 +G,1.0449765 +H,-1.4738898 +I,0.3931618 +L,1.2658296 +K,-0.2771101 +M,-1.0047207 +F,-0.3966186 +P,0.4207004 +S,0.6701745 +T,0.9078985 +W,-2.1275244 +Y,-0.8380164 +V,1.2419935 diff --git a/peptides/tables/atchley/AF5.csv b/peptides/tables/atchley/AF5.csv new file mode 100644 index 00000000..c23b7e0c --- /dev/null +++ b/peptides/tables/atchley/AF5.csv @@ -0,0 +1,20 @@ +A,-0.14550842 +R,2.89744417 +N,0.93339498 +D,-3.24176791 +C,-0.25516894 +Q,-1.85303476 +E,-0.83715681 +G,2.06385566 +H,-0.07772917 +I,0.81630366 +L,-0.91181195 +K,1.64762794 +M,1.21181214 +F,0.41194139 +P,-1.39177378 +S,-2.64747356 +T,1.31337035 +W,-0.18358096 +Y,1.51150958 +V,-1.26225362