Skip to content

Commit

Permalink
Merge pull request #195 from Clinical-Genomics/rc410
Browse files Browse the repository at this point in the history
### Added

- Release candidate microSALT 4.1.0
  • Loading branch information
karlnyr authored Jan 8, 2025
2 parents ac85314 + 124b4bf commit ff3549e
Show file tree
Hide file tree
Showing 16 changed files with 1,037 additions and 772 deletions.
23 changes: 12 additions & 11 deletions configExample.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,21 @@
"project": "production",
"type": "core"
},

"regex": {
"regex": {
"mail_recipient": "[email protected]",
"_comment": "File finding patterns. Only single capture group accepted (for reverse/forward identifier)",
"file_pattern": "\\w{8,12}_\\w{8,10}(?:-\\d+)*_L\\d_(?:R)*(\\d{1}).fastq.gz",
"_comment": "Organisms recognized enough to be considered stable",
"verified_organisms": []
},

"_comment": "Folders",
"folders": {
"folders": {
"_comment": "Root folder for ALL output",
"results": "/tmp/MLST/results/",
"_comment": "Report collection folder",
"reports": "/tmp/MLST/reports/",
"_comment": "Log file position and name",
"log_file": "/tmp/microsalt.log",

"_comment": "Root folder for input fasta sequencing data",
"seqdata": "/tmp/projects/",
"_comment": "ST profiles. Each ST profile file under 'profiles' have an identicial folder under references",
Expand All @@ -35,18 +32,18 @@
"_comment": "Resistances. Commonly from resFinder",
"resistances": "/tmp/MLST/references/resistances",
"_comment": "Download path for NCBI genomes, for alignment usage",
"genomes": "/tmp/MLST/references/genomes"
"genomes": "/tmp/MLST/references/genomes",
"_comment": "PubMLST credentials",
"pubmlst_credentials": "/tmp/MLST/credentials"
},

"_comment": "Database/Flask configuration",
"database": {
"SQLALCHEMY_DATABASE_URI": "sqlite:////tmp/microsalt.db",
"SQLALCHEMY_TRACK_MODIFICATIONS": "False",
"DEBUG": "True"
},

"_comment": "Thresholds for Displayed results",
"threshold": {
"threshold": {
"_comment": "Typing thresholds",
"mlst_id": 100,
"mlst_novel_id": 99.5,
Expand All @@ -72,11 +69,15 @@
"bp_50x_warn": 50,
"bp_100x_warn": 20
},

"_comment": "Genologics temporary configuration file",
"genologics": {
"baseuri": "https://lims.facility.se/",
"username": "limsuser",
"password": "mypassword"
},
"_comment": "PubMLST credentials",
"pubmlst": {
"client_id": "",
"client_secret": ""
}
}
}
43 changes: 21 additions & 22 deletions microSALT/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from flask import Flask
from distutils.sysconfig import get_python_lib

__version__ = "4.0.0"
__version__ = "4.1.0"

app = Flask(__name__, template_folder="server/templates")
app.config.setdefault("SQLALCHEMY_DATABASE_URI", "sqlite:///:memory:")
Expand Down Expand Up @@ -47,11 +47,22 @@
# Load flask info
app.config.update(preset_config["database"])

# Add `folders` configuration
app.config["folders"] = preset_config.get("folders", {})

# Ensure PubMLST configuration is included

app.config["pubmlst"] = preset_config.get("pubmlst", {
"client_id": "",
"client_secret": ""
})

app.config["pubmlst"] = preset_config.get("pubmlst", {"client_id": "", "client_secret": ""})


# Add extrapaths to config
preset_config["folders"]["expec"] = os.path.abspath(
os.path.join(
pathlib.Path(__file__).parent.parent, "unique_references/ExPEC.fsa"
)
os.path.join(pathlib.Path(__file__).parent.parent, "unique_references/ExPEC.fsa")
)
# Check if release install exists
for entry in os.listdir(get_python_lib()):
Expand Down Expand Up @@ -104,22 +115,14 @@
):
# Special string, mangling
if thing == "log_file":
unmade_fldr = os.path.dirname(
preset_config[entry][thing]
)
bash_cmd = "touch {}".format(
preset_config[entry][thing]
)
proc = subprocess.Popen(
bash_cmd.split(), stdout=subprocess.PIPE
)
unmade_fldr = os.path.dirname(preset_config[entry][thing])
bash_cmd = "touch {}".format(preset_config[entry][thing])
proc = subprocess.Popen(bash_cmd.split(), stdout=subprocess.PIPE)
output, error = proc.communicate()
elif thing == "SQLALCHEMY_DATABASE_URI":
unmade_fldr = os.path.dirname(db_file)
bash_cmd = "touch {}".format(db_file)
proc = subprocess.Popen(
bash_cmd.split(), stdout=subprocess.PIPE
)
proc = subprocess.Popen(bash_cmd.split(), stdout=subprocess.PIPE)
output, error = proc.communicate()
if proc.returncode != 0:
logger.error(
Expand All @@ -132,12 +135,8 @@
os.makedirs(unmade_fldr)
logger.info("Created path {}".format(unmade_fldr))

fh = logging.FileHandler(
os.path.expanduser(preset_config["folders"]["log_file"])
)
fh.setFormatter(
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
)
fh = logging.FileHandler(os.path.expanduser(preset_config["folders"]["log_file"]))
fh.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
logger.addHandler(fh)

# Integrity check database
Expand Down
Empty file.
106 changes: 106 additions & 0 deletions microSALT/utils/pubmlst/authentication.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import json
import os
from datetime import datetime, timedelta
from dateutil import parser
from rauth import OAuth1Session
from microSALT import logger
from microSALT.utils.pubmlst.helpers import BASE_API, save_session_token, load_auth_credentials, get_path, folders_config, credentials_path_key, pubmlst_session_credentials_file_name
from microSALT.utils.pubmlst.exceptions import (
PUBMLSTError,
SessionTokenRequestError,
SessionTokenResponseError,
)

session_token_validity = 12 # 12-hour validity
session_expiration_buffer = 60 # 60-second buffer

def get_new_session_token(db: str):
"""Request a new session token using all credentials for a specific database."""
logger.debug("Fetching a new session token for database '{db}'...")

try:
consumer_key, consumer_secret, access_token, access_secret = load_auth_credentials()

url = f"{BASE_API}/db/{db}/oauth/get_session_token"

session = OAuth1Session(
consumer_key=consumer_key,
consumer_secret=consumer_secret,
access_token=access_token,
access_token_secret=access_secret,
)

response = session.get(url, headers={"User-Agent": "BIGSdb downloader"})
logger.debug("Response Status Code: {status_code}")

if response.ok:
try:
token_data = response.json()
session_token = token_data.get("oauth_token")
session_secret = token_data.get("oauth_token_secret")

if not session_token or not session_secret:
raise SessionTokenResponseError(
db, "Missing 'oauth_token' or 'oauth_token_secret' in response."
)

expiration_time = datetime.now() + timedelta(hours=session_token_validity)

save_session_token(db, session_token, session_secret, expiration_time)
return session_token, session_secret

except (ValueError, KeyError) as e:
raise SessionTokenResponseError(db, f"Invalid response format: {str(e)}")
else:
raise SessionTokenRequestError(
db, response.status_code, response.text
)

except PUBMLSTError as e:
logger.error(f"Error during token fetching: {e}")
raise
except Exception as e:
logger.error(f"Unexpected error: {e}")
raise PUBMLSTError(f"Unexpected error while fetching session token for database '{db}': {e}")

def load_session_credentials(db: str):
"""Load session token from file for a specific database."""
try:
credentials_file = os.path.join(
get_path(folders_config, credentials_path_key),
pubmlst_session_credentials_file_name
)

if not os.path.exists(credentials_file):
logger.debug("Session file does not exist. Fetching a new session token.")
return get_new_session_token(db)

with open(credentials_file, "r") as f:
try:
all_sessions = json.load(f)
except json.JSONDecodeError as e:
raise SessionTokenResponseError(db, f"Failed to parse session file: {str(e)}")

db_session_data = all_sessions.get("databases", {}).get(db)
if not db_session_data:
logger.debug(f"No session token found for database '{db}'. Fetching a new session token.")
return get_new_session_token(db)

expiration = parser.parse(db_session_data.get("expiration", ""))
if datetime.now() < expiration - timedelta(seconds=session_expiration_buffer):
logger.debug(f"Using existing session token for database '{db}'.")
session_token = db_session_data.get("token")
session_secret = db_session_data.get("secret")

return session_token, session_secret

logger.debug(f"Session token for database '{db}' has expired. Fetching a new session token.")
return get_new_session_token(db)

except PUBMLSTError as e:
logger.error(f"PUBMLST-specific error occurred: {e}")
raise
except Exception as e:
logger.error(f"Unexpected error: {e}")
raise PUBMLSTError(f"Unexpected error while loading session token for database '{db}': {e}")

116 changes: 116 additions & 0 deletions microSALT/utils/pubmlst/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import requests
from urllib.parse import urlencode
from microSALT.utils.pubmlst.helpers import (
BASE_API,
generate_oauth_header,
load_auth_credentials,
parse_pubmlst_url
)
from microSALT.utils.pubmlst.constants import RequestType, HTTPMethod, ResponseHandler
from microSALT.utils.pubmlst.exceptions import PUBMLSTError, SessionTokenRequestError
from microSALT.utils.pubmlst.authentication import load_session_credentials
from microSALT import logger

class PubMLSTClient:
"""Client for interacting with the PubMLST authenticated API."""

def __init__(self):
"""Initialize the PubMLST client."""
try:
self.consumer_key, self.consumer_secret, self.access_token, self.access_secret = load_auth_credentials()
self.database = "pubmlst_test_seqdef"
self.session_token, self.session_secret = load_session_credentials(self.database)
except PUBMLSTError as e:
logger.error(f"Failed to initialize PubMLST client: {e}")
raise


@staticmethod
def parse_pubmlst_url(url: str):
"""
Wrapper for the parse_pubmlst_url function.
"""
return parse_pubmlst_url(url)


def _make_request(self, request_type: RequestType, method: HTTPMethod, url: str, db: str = None, response_handler: ResponseHandler = ResponseHandler.JSON):
""" Handle API requests."""
try:
if db:
session_token, session_secret = load_session_credentials(db)
else:
session_token, session_secret = self.session_token, self.session_secret

if request_type == RequestType.AUTH:
headers = {
"Authorization": generate_oauth_header(url, self.consumer_key, self.consumer_secret, self.access_token, self.access_secret)
}
elif request_type == RequestType.DB:
headers = {
"Authorization": generate_oauth_header(url, self.consumer_key, self.consumer_secret, session_token, session_secret)
}
else:
raise ValueError(f"Unsupported request type: {request_type}")

if method == HTTPMethod.GET:
response = requests.get(url, headers=headers)
elif method == HTTPMethod.POST:
response = requests.post(url, headers=headers)
elif method == HTTPMethod.PUT:
response = requests.put(url, headers=headers)
else:
raise ValueError(f"Unsupported HTTP method: {method}")

response.raise_for_status()

if response_handler == ResponseHandler.CONTENT:
return response.content
elif response_handler == ResponseHandler.TEXT:
return response.text
elif response_handler == ResponseHandler.JSON:
return response.json()
else:
raise ValueError(f"Unsupported response handler: {response_handler}")

except requests.exceptions.HTTPError as e:
raise SessionTokenRequestError(db or self.database, e.response.status_code, e.response.text) from e
except requests.exceptions.RequestException as e:
logger.error(f"Request failed: {e}")
raise PUBMLSTError(f"Request failed: {e}") from e
except Exception as e:
logger.error(f"Unexpected error during request: {e}")
raise PUBMLSTError(f"An unexpected error occurred: {e}") from e


def query_databases(self):
"""Query available PubMLST databases."""
url = f"{BASE_API}/db"
return self._make_request(RequestType.DB, HTTPMethod.GET, url, response_handler=ResponseHandler.JSON)


def download_locus(self, db: str, locus: str, **kwargs):
"""Download locus sequence files."""
base_url = f"{BASE_API}/db/{db}/loci/{locus}/alleles_fasta"
query_string = urlencode(kwargs)
url = f"{base_url}?{query_string}" if query_string else base_url
return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.TEXT)


def download_profiles_csv(self, db: str, scheme_id: int):
"""Download MLST profiles in CSV format."""
if not scheme_id:
raise ValueError("Scheme ID is required to download profiles CSV.")
url = f"{BASE_API}/db/{db}/schemes/{scheme_id}/profiles_csv"
return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.TEXT)


def retrieve_scheme_info(self, db: str, scheme_id: int):
"""Retrieve information about a specific MLST scheme."""
url = f"{BASE_API}/db/{db}/schemes/{scheme_id}"
return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.JSON)


def list_schemes(self, db: str):
"""List available MLST schemes for a specific database."""
url = f"{BASE_API}/db/{db}/schemes"
return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.JSON)
Loading

0 comments on commit ff3549e

Please sign in to comment.