Skip to content

Commit

Permalink
load edam data into MariaDB edam_etl table
Browse files Browse the repository at this point in the history
  • Loading branch information
mdsage1 committed Oct 21, 2024
1 parent fcb3799 commit 253a2ee
Showing 1 changed file with 65 additions and 0 deletions.
65 changes: 65 additions & 0 deletions apps/openchallenges/edam-etl/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import requests
from os import getenv
from typing import Optional
import mariadb
import sys
import mysql.connector
from sqlalchemy import create_engine

# Get config from the environment variables

Expand All @@ -11,6 +15,66 @@
print(f"EDAM Version: {VERSION}")
print(f"OC DB URL: {OC_DB_URL}")

# Intialize required connection variables from environment variables

USERNAME = getenv("USERNAME")
PASSWORD = getenv("PASSWORD")
PORT = getenv("PORT")
DB = getenv("DB")
HOST = getenv("HOST")


def connect_to_mariadb(
username: str, password: str, port: str, host: str, database: str, df: pd.DataFrame
) -> None:
"""Connect to the MariaDB database"""
try:
conn = mariadb.connect(
user=username,
password=password,
host=host,
port=int(port),
database=database,
)
print("Establishing a connection to the MariaDB Platform.")
# Get the cursor
cur = conn.cursor()
print("Connection has been established to MariaDB Platform!")

# Commit the transaction
conn.commit()
print("The table edam_etl has been added to the edam database!")

# Create a SQLAlchemy engine from the MySQL Connector connection
engine = create_engine(
f"mysql+mysqlconnector://{username}:{password}@{host}/{database}"
)

# Drop the table if it exists
cur.execute("DROP TABLE IF EXISTS edam_etl")

# Create the table with columns
cur.execute(
"""
CREATE TABLE edam_etl (
id INT PRIMARY KEY,
class_id VARCHAR(255),
preferred_label VARCHAR(255)
)
"""
)

# Load the concepts
df.to_sql(name="edam_etl", con=engine, if_exists="append", index=False)

print("The table edam_etl has been populated with the EDAM concepts!")

# Close the connection
conn.close()
except mariadb.Error as e:
print(f"Error connecting to MariaDB Platform: {e}")
sys.exit(1)


def download_edam_csv(url: str, version: str) -> Optional[bool]:
"""Download EDAM concepts from GitHub or S3 bucket (CSV file)"""
Expand Down Expand Up @@ -101,6 +165,7 @@ def main() -> None:
if download_edam_csv(url, VERSION):
df: pd.DataFrame = transform_to_dataframe(VERSION)
print_info_statistics(df)
connect_to_mariadb(USERNAME, PASSWORD, PORT, HOST, DB, df)


if __name__ == "__main__":
Expand Down

0 comments on commit 253a2ee

Please sign in to comment.