Merge pull request #195 from Clinical-Genomics/rc410

### Added - Release candidate microSALT 4.1.0
Clinical-Genomics · Jan 8, 2025 · ff3549e · ff3549e
2 parents ac85314 + 124b4bf
commit ff3549e
Show file tree

Hide file tree

Showing 16 changed files with 1,037 additions and 772 deletions.
diff --git a/configExample.json b/configExample.json
@@ -8,24 +8,21 @@
     "project": "production",
     "type": "core"
   },
-
-  "regex":  {
+  "regex": {
     "mail_recipient": "[email protected]",
     "_comment": "File finding patterns. Only single capture group accepted (for reverse/forward identifier)",
     "file_pattern": "\\w{8,12}_\\w{8,10}(?:-\\d+)*_L\\d_(?:R)*(\\d{1}).fastq.gz",
     "_comment": "Organisms recognized enough to be considered stable",
     "verified_organisms": []
   },
-
   "_comment": "Folders",
-  "folders":  {
+  "folders": {
     "_comment": "Root folder for ALL output",
     "results": "/tmp/MLST/results/",
     "_comment": "Report collection folder",
     "reports": "/tmp/MLST/reports/",
     "_comment": "Log file position and name",
     "log_file": "/tmp/microsalt.log",
-
     "_comment": "Root folder for input fasta sequencing data",
     "seqdata": "/tmp/projects/",
     "_comment": "ST profiles. Each ST profile file under 'profiles' have an identicial folder under references",
@@ -35,18 +32,18 @@
     "_comment": "Resistances. Commonly from resFinder",
     "resistances": "/tmp/MLST/references/resistances",
     "_comment": "Download path for NCBI genomes, for alignment usage",
-    "genomes": "/tmp/MLST/references/genomes"
+    "genomes": "/tmp/MLST/references/genomes",
+    "_comment": "PubMLST credentials",
+    "pubmlst_credentials": "/tmp/MLST/credentials"
   },
-
   "_comment": "Database/Flask configuration",
   "database": {
     "SQLALCHEMY_DATABASE_URI": "sqlite:////tmp/microsalt.db",
     "SQLALCHEMY_TRACK_MODIFICATIONS": "False",
     "DEBUG": "True"
   },
-
   "_comment": "Thresholds for Displayed results",
-  "threshold":  {
+  "threshold": {
     "_comment": "Typing thresholds",
     "mlst_id": 100,
     "mlst_novel_id": 99.5,
@@ -72,11 +69,15 @@
     "bp_50x_warn": 50,
     "bp_100x_warn": 20
   },
-
   "_comment": "Genologics temporary configuration file",
   "genologics": {
     "baseuri": "https://lims.facility.se/",
     "username": "limsuser",
     "password": "mypassword"
+  },
+  "_comment": "PubMLST credentials",
+  "pubmlst": {
+    "client_id": "",
+    "client_secret": ""
   }
-}
+}
diff --git a/microSALT/__init__.py b/microSALT/__init__.py
@@ -10,7 +10,7 @@
 from flask import Flask
 from distutils.sysconfig import get_python_lib
 
-__version__ = "4.0.0"
+__version__ = "4.1.0"
 
 app = Flask(__name__, template_folder="server/templates")
 app.config.setdefault("SQLALCHEMY_DATABASE_URI", "sqlite:///:memory:")
@@ -47,11 +47,22 @@
         # Load flask info
         app.config.update(preset_config["database"])
 
+        # Add `folders` configuration
+        app.config["folders"] = preset_config.get("folders", {})
+
+        # Ensure PubMLST configuration is included
+
+        app.config["pubmlst"] = preset_config.get("pubmlst", {
+            "client_id": "",
+            "client_secret": ""        
+            })
+
+        app.config["pubmlst"] = preset_config.get("pubmlst", {"client_id": "", "client_secret": ""})
+
+
         # Add extrapaths to config
         preset_config["folders"]["expec"] = os.path.abspath(
-            os.path.join(
-                pathlib.Path(__file__).parent.parent, "unique_references/ExPEC.fsa"
-            )
+            os.path.join(pathlib.Path(__file__).parent.parent, "unique_references/ExPEC.fsa")
         )
         # Check if release install exists
         for entry in os.listdir(get_python_lib()):
@@ -104,22 +115,14 @@
                         ):
                             # Special string, mangling
                             if thing == "log_file":
-                                unmade_fldr = os.path.dirname(
-                                    preset_config[entry][thing]
-                                )
-                                bash_cmd = "touch {}".format(
-                                    preset_config[entry][thing]
-                                )
-                                proc = subprocess.Popen(
-                                    bash_cmd.split(), stdout=subprocess.PIPE
-                                )
+                                unmade_fldr = os.path.dirname(preset_config[entry][thing])
+                                bash_cmd = "touch {}".format(preset_config[entry][thing])
+                                proc = subprocess.Popen(bash_cmd.split(), stdout=subprocess.PIPE)
                                 output, error = proc.communicate()
                             elif thing == "SQLALCHEMY_DATABASE_URI":
                                 unmade_fldr = os.path.dirname(db_file)
                                 bash_cmd = "touch {}".format(db_file)
-                                proc = subprocess.Popen(
-                                    bash_cmd.split(), stdout=subprocess.PIPE
-                                )
+                                proc = subprocess.Popen(bash_cmd.split(), stdout=subprocess.PIPE)
                                 output, error = proc.communicate()
                                 if proc.returncode != 0:
                                     logger.error(
@@ -132,12 +135,8 @@
                                 os.makedirs(unmade_fldr)
                                 logger.info("Created path {}".format(unmade_fldr))
 
-        fh = logging.FileHandler(
-            os.path.expanduser(preset_config["folders"]["log_file"])
-        )
-        fh.setFormatter(
-            logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-        )
+        fh = logging.FileHandler(os.path.expanduser(preset_config["folders"]["log_file"]))
+        fh.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
         logger.addHandler(fh)
 
         # Integrity check database

diff --git a/microSALT/utils/pubmlst/__init__.py b/microSALT/utils/pubmlst/__init__.py
diff --git a/microSALT/utils/pubmlst/authentication.py b/microSALT/utils/pubmlst/authentication.py
@@ -0,0 +1,106 @@
+import json
+import os
+from datetime import datetime, timedelta
+from dateutil import parser
+from rauth import OAuth1Session
+from microSALT import logger
+from microSALT.utils.pubmlst.helpers import BASE_API, save_session_token, load_auth_credentials, get_path, folders_config, credentials_path_key, pubmlst_session_credentials_file_name 
+from microSALT.utils.pubmlst.exceptions import (
+    PUBMLSTError,
+    SessionTokenRequestError,
+    SessionTokenResponseError,
+)
+
+session_token_validity = 12  # 12-hour validity
+session_expiration_buffer = 60  # 60-second buffer
+
+def get_new_session_token(db: str):
+    """Request a new session token using all credentials for a specific database."""
+    logger.debug("Fetching a new session token for database '{db}'...")
+
+    try:
+        consumer_key, consumer_secret, access_token, access_secret = load_auth_credentials()
+
+        url = f"{BASE_API}/db/{db}/oauth/get_session_token"
+
+        session = OAuth1Session(
+            consumer_key=consumer_key,
+            consumer_secret=consumer_secret,
+            access_token=access_token,
+            access_token_secret=access_secret,
+        )
+
+        response = session.get(url, headers={"User-Agent": "BIGSdb downloader"})
+        logger.debug("Response Status Code: {status_code}")
+
+        if response.ok:
+            try:
+                token_data = response.json()
+                session_token = token_data.get("oauth_token")
+                session_secret = token_data.get("oauth_token_secret")
+
+                if not session_token or not session_secret:
+                    raise SessionTokenResponseError(
+                        db, "Missing 'oauth_token' or 'oauth_token_secret' in response."
+                    )
+
+                expiration_time = datetime.now() + timedelta(hours=session_token_validity)
+
+                save_session_token(db, session_token, session_secret, expiration_time)
+                return session_token, session_secret
+
+            except (ValueError, KeyError) as e:
+                raise SessionTokenResponseError(db, f"Invalid response format: {str(e)}")
+        else:
+            raise SessionTokenRequestError(
+                db, response.status_code, response.text
+            )
+
+    except PUBMLSTError as e:
+        logger.error(f"Error during token fetching: {e}")
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error: {e}")
+        raise PUBMLSTError(f"Unexpected error while fetching session token for database '{db}': {e}")
+
+def load_session_credentials(db: str):
+    """Load session token from file for a specific database."""
+    try:
+        credentials_file = os.path.join(
+            get_path(folders_config, credentials_path_key),
+            pubmlst_session_credentials_file_name
+        )
+
+        if not os.path.exists(credentials_file):
+            logger.debug("Session file does not exist. Fetching a new session token.")
+            return get_new_session_token(db)
+
+        with open(credentials_file, "r") as f:
+            try:
+                all_sessions = json.load(f)
+            except json.JSONDecodeError as e:
+                raise SessionTokenResponseError(db, f"Failed to parse session file: {str(e)}")
+
+        db_session_data = all_sessions.get("databases", {}).get(db)
+        if not db_session_data:
+            logger.debug(f"No session token found for database '{db}'. Fetching a new session token.")
+            return get_new_session_token(db)
+
+        expiration = parser.parse(db_session_data.get("expiration", ""))
+        if datetime.now() < expiration - timedelta(seconds=session_expiration_buffer):
+            logger.debug(f"Using existing session token for database '{db}'.")
+            session_token = db_session_data.get("token")
+            session_secret = db_session_data.get("secret")
+
+            return session_token, session_secret
+
+        logger.debug(f"Session token for database '{db}' has expired. Fetching a new session token.")
+        return get_new_session_token(db)
+
+    except PUBMLSTError as e:
+        logger.error(f"PUBMLST-specific error occurred: {e}")
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error: {e}")
+        raise PUBMLSTError(f"Unexpected error while loading session token for database '{db}': {e}")
+
diff --git a/microSALT/utils/pubmlst/client.py b/microSALT/utils/pubmlst/client.py
@@ -0,0 +1,116 @@
+import requests
+from urllib.parse import urlencode
+from microSALT.utils.pubmlst.helpers import (
+    BASE_API,
+    generate_oauth_header,
+    load_auth_credentials,
+    parse_pubmlst_url
+)
+from microSALT.utils.pubmlst.constants import RequestType, HTTPMethod, ResponseHandler
+from microSALT.utils.pubmlst.exceptions import PUBMLSTError, SessionTokenRequestError
+from microSALT.utils.pubmlst.authentication import load_session_credentials
+from microSALT import logger
+
+class PubMLSTClient:
+    """Client for interacting with the PubMLST authenticated API."""
+
+    def __init__(self):
+        """Initialize the PubMLST client."""
+        try:
+            self.consumer_key, self.consumer_secret, self.access_token, self.access_secret = load_auth_credentials()
+            self.database = "pubmlst_test_seqdef"
+            self.session_token, self.session_secret = load_session_credentials(self.database)
+        except PUBMLSTError as e:
+            logger.error(f"Failed to initialize PubMLST client: {e}")
+            raise
+
+
+    @staticmethod
+    def parse_pubmlst_url(url: str):
+        """
+        Wrapper for the parse_pubmlst_url function.
+        """
+        return parse_pubmlst_url(url)
+
+
+    def _make_request(self, request_type: RequestType, method: HTTPMethod, url: str, db: str = None, response_handler: ResponseHandler = ResponseHandler.JSON):
+        """ Handle API requests."""     
+        try:
+            if db:
+                session_token, session_secret = load_session_credentials(db)
+            else:
+                session_token, session_secret = self.session_token, self.session_secret
+
+            if request_type == RequestType.AUTH:
+                headers = {
+                    "Authorization": generate_oauth_header(url, self.consumer_key, self.consumer_secret, self.access_token, self.access_secret)
+                }
+            elif request_type == RequestType.DB:
+                headers = {
+                    "Authorization": generate_oauth_header(url, self.consumer_key, self.consumer_secret, session_token, session_secret)
+                }
+            else:
+                raise ValueError(f"Unsupported request type: {request_type}")
+
+            if method == HTTPMethod.GET:
+                response = requests.get(url, headers=headers)
+            elif method == HTTPMethod.POST:
+                response = requests.post(url, headers=headers)
+            elif method == HTTPMethod.PUT:
+                response = requests.put(url, headers=headers)
+            else:
+                raise ValueError(f"Unsupported HTTP method: {method}")
+
+            response.raise_for_status()
+
+            if response_handler == ResponseHandler.CONTENT:
+                return response.content
+            elif response_handler == ResponseHandler.TEXT:
+                return response.text
+            elif response_handler == ResponseHandler.JSON:
+                return response.json()
+            else:
+                raise ValueError(f"Unsupported response handler: {response_handler}")
+
+        except requests.exceptions.HTTPError as e:
+            raise SessionTokenRequestError(db or self.database, e.response.status_code, e.response.text) from e
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Request failed: {e}")
+            raise PUBMLSTError(f"Request failed: {e}") from e
+        except Exception as e:
+            logger.error(f"Unexpected error during request: {e}")
+            raise PUBMLSTError(f"An unexpected error occurred: {e}") from e
+
+
+    def query_databases(self):
+        """Query available PubMLST databases."""
+        url = f"{BASE_API}/db"
+        return self._make_request(RequestType.DB, HTTPMethod.GET, url, response_handler=ResponseHandler.JSON)
+
+
+    def download_locus(self, db: str, locus: str, **kwargs):
+        """Download locus sequence files."""
+        base_url = f"{BASE_API}/db/{db}/loci/{locus}/alleles_fasta"
+        query_string = urlencode(kwargs)
+        url = f"{base_url}?{query_string}" if query_string else base_url
+        return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.TEXT)
+
+
+    def download_profiles_csv(self, db: str, scheme_id: int):
+        """Download MLST profiles in CSV format."""
+        if not scheme_id:
+            raise ValueError("Scheme ID is required to download profiles CSV.")
+        url = f"{BASE_API}/db/{db}/schemes/{scheme_id}/profiles_csv"
+        return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.TEXT)
+
+
+    def retrieve_scheme_info(self, db: str, scheme_id: int):
+        """Retrieve information about a specific MLST scheme."""
+        url = f"{BASE_API}/db/{db}/schemes/{scheme_id}"
+        return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.JSON)
+
+
+    def list_schemes(self, db: str):
+        """List available MLST schemes for a specific database."""
+        url = f"{BASE_API}/db/{db}/schemes"
+        return self._make_request(RequestType.DB, HTTPMethod.GET, url, db=db, response_handler=ResponseHandler.JSON)