Skip to content

Commit

Permalink
Fix downloads (#233)
Browse files Browse the repository at this point in the history
* fix loaders

* fix download workflow

* finishing touches'

* revert version

* improve ensembl release listing

---------

Co-authored-by: Anthony Cesnik <[email protected]>
  • Loading branch information
acesnik and acesnik authored Aug 12, 2023
1 parent 63c3c3e commit 1c5f44f
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 37 deletions.
2 changes: 1 addition & 1 deletion Spritz/SpritzBackend/EnsemblRelease.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public static ObservableCollection<EnsemblRelease> GetReleases()
foreach (string release in releases)
{
// read txt file into obsv collection
var species = genomeDB.Select(g => g.Split(',')[1]).Distinct().ToList();
var species = genomeDB.Where(g => g.Contains(release)).Select(g => g.Split(',')[1]).Distinct().ToList();
Dictionary<string, string> genomes = new();
Dictionary<string, string> organisms = new();

Expand Down
2 changes: 1 addition & 1 deletion Spritz/SpritzBackend/RunnerEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public class RunnerEngine
public string SnakemakeCommand { get; private set; }
public string SpritzCMDCommand { get; set; }

public static readonly string CurrentVersion = "0.3.8"; // should be the same here, in config.yaml, and in common.smk
public static readonly string CurrentVersion = "0.3.8";
public static readonly bool PrebuiltSpritzMods = true; // always using prebuilt library now
public RunnerEngine(Tuple<string, SpritzOptions> task, string outputFolder)
{
Expand Down
3 changes: 2 additions & 1 deletion Spritz/SpritzModifications/SpritzModifications.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
<PackageReference Include="FluentCommandLineParser" Version="1.4.3">
<NoWarn>$(NoWarn);NU1701</NoWarn>
</PackageReference>
<PackageReference Include="mzLib" Version="1.0.532" />
<PackageReference Include="mzLib" Version="1.0.540" />
<PackageReference Include="TopDownProteomics" Version="0.0.295" />
</ItemGroup>

<ItemGroup>
Expand Down
9 changes: 6 additions & 3 deletions Spritz/SpritzTest/SpritzTest.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.1" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.7.0" />
<PackageReference Include="NUnit" Version="3.13.3" />
<PackageReference Include="NUnit3TestAdapter" Version="4.2.1" />
<PackageReference Include="coverlet.collector" Version="3.0.2" />
<PackageReference Include="NUnit3TestAdapter" Version="4.5.0" />
<PackageReference Include="coverlet.collector" Version="6.0.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
</ItemGroup>

<ItemGroup>
Expand Down
18 changes: 5 additions & 13 deletions Spritz/workflow/scripts/download_uniprot.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,20 @@
# see https://www.ebi.ac.uk/training/online/sites/ebi.ac.uk.training.online/files/UniProt_programmatically_py3.pdf

import get_proteome
import requests
import sys

format = sys.argv[1]

proteome = get_proteome.proteome
BASE = 'http://legacy.uniprot.org'
KB_ENDPOINT = '/uniprot/'
TOOL_ENDPOINT = '/uploadlists/'

# query = 'name:"polymerase alpha" AND proteome:UP000005640 AND reviewed:yes'
# query = 'proteome:UP000005640 AND reviewed:yes'
query = 'proteome:' + proteome
BASE_URL = 'https://rest.uniprot.org'
ENDPOINT = '/uniprot/search'

payload = {
'query': query,
'query': proteome,
'format': format,
'include': 'yes', # include isoforms in fasta
# 'columns': 'id,entry_name,reviewed,protein_names,organism,ec,keywords',
'includeIsoforms': 'yes', # include isoforms in fasta
}

result = requests.get(BASE + KB_ENDPOINT, params=payload, stream=True)
result = requests.get(BASE_URL + ENDPOINT, params=payload, stream=True)
result.raise_for_status() # throw an error for bad status code
for block in result.iter_content(1024):
sys.stdout.buffer.write(block)
32 changes: 14 additions & 18 deletions Spritz/workflow/scripts/get_proteome.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,29 @@
import yaml

# find proteome
BASE = 'http://legacy.uniprot.org'
KB_ENDPOINT = '/proteomes/'
TOOL_ENDPOINT = '/uploadlists/'
BASE_URL = 'https://rest.uniprot.org'
ENDPOINT = '/proteomes/search'

params = {
'query': '*',
'format': 'tsv',
}

with open("config/config.yaml", 'r') as stream:
data = yaml.safe_load(stream)

query = data["species"] # read config
organism = data["organism"].lower()

# special case
if query == 'canis_familiaris':
query = 'canis_lupus_familiaris'

payload = {
'query': query,
'sort': 'score',
'format': 'tab',
}

proteome_res = requests.get(BASE + KB_ENDPOINT, params=payload, stream=True)
proteome_res = requests.get(BASE_URL + ENDPOINT, params=params, stream=True)
proteome_res.raise_for_status() # throw an error for bad status code

results = proteome_res.text.split('\n')[1:]

proteome = None
for r in results:
splt = r.split('\t')
if organism in splt[1].lower():
if organism.replace('_', ' ') in splt[1].lower():
proteome = splt[0]
break

if proteome is None:
print(f"Proteome for organism {organism} not found.")
sys.exit(1)

0 comments on commit 1c5f44f

Please sign in to comment.