Skip to content

Commit

Permalink
Merge pull request #16 from kiwix/update-mirrors
Browse files Browse the repository at this point in the history
update mirrors from url
  • Loading branch information
elfkuzco authored Jun 11, 2024
2 parents e7b68f2 + 9b922b7 commit 661c4bb
Show file tree
Hide file tree
Showing 20 changed files with 520 additions and 46 deletions.
2 changes: 1 addition & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.11-slim
FROM python:3.11-slim-bookworm
LABEL org.opencontainers.image.source=https://github.com/kiwix/mirrors-qa
# Copy code
COPY src /src/src
Expand Down
8 changes: 7 additions & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ dependencies = [
"pydantic == 2.7.2",
"SQLAlchemy == 2.0.30",
"psycopg[binary,pool] == 3.1.19",
"beautifulsoup4 == 4.12.3",
"requests == 2.32.3",
"pycountry == 24.6.1",
]
license = {text = "GPL-3.0-or-later"}
classifiers = [
Expand All @@ -30,6 +33,9 @@ dynamic = ["version"]
[project.urls]
Homepage = "https://github.com/kiwix/mirrors-qa"

[project.scripts]
update-mirrors = "mirrors_qa_backend.entrypoint:main"

[project.optional-dependencies]
scripts = [
"invoke==2.2.0",
Expand Down Expand Up @@ -92,7 +98,7 @@ fix-ruff = "inv fix-ruff --args '{args}'"
fixall = "inv fixall --args '{args}'"

[tool.hatch.envs.check]
features = ["scripts", "check"]
features = ["scripts", "test", "check"]

[tool.hatch.envs.check.scripts]
pyright = "inv check-pyright --args '{args}'"
Expand Down
5 changes: 3 additions & 2 deletions backend/src/mirrors_qa_backend/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import logging
import os

from mirrors_qa_backend.settings import Settings

logger = logging.getLogger("backend")

if not logger.hasHandlers():
logger.setLevel(logging.DEBUG if bool(os.getenv("DEBUG")) else logging.INFO)
logger.setLevel(logging.DEBUG if Settings.debug else logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("[%(asctime)s: %(levelname)s] %(message)s"))
logger.addHandler(handler)
Empty file.
24 changes: 19 additions & 5 deletions backend/src/mirrors_qa_backend/db/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from sqlalchemy.orm import sessionmaker

from mirrors_qa_backend import logger
from mirrors_qa_backend.db.models import Mirror
from mirrors_qa_backend.db import mirrors, models
from mirrors_qa_backend.extract import get_current_mirrors
from mirrors_qa_backend.settings import Settings

Session = sessionmaker(
Expand Down Expand Up @@ -38,9 +39,22 @@ def count_from_stmt(session: OrmSession, stmt: SelectBase) -> int:

def initialize_mirrors() -> None:
with Session.begin() as session:
count = count_from_stmt(session, select(Mirror))
if count == 0:
current_mirrors = get_current_mirrors()
nb_mirrors = count_from_stmt(session, select(models.Mirror))
if nb_mirrors == 0:
logger.info("No mirrors exist in database.")
# TODO: update mirrors from https://download.kiwix.org/mirrors.html
if not current_mirrors:
logger.info(f"No mirrors were found on {Settings.mirrors_url!r}")
return
result = mirrors.create_or_update_status(session, current_mirrors)
logger.info(
f"Registered {result.nb_mirrors_added} mirrors "
f"from {Settings.mirrors_url!r}"
)
else:
logger.info(f"Found {count} mirrors in database.")
logger.info(f"Found {nb_mirrors} mirrors in database.")
result = mirrors.create_or_update_status(session, current_mirrors)
logger.info(
f"Added {result.nb_mirrors_added} mirrors. "
f"Disabled {result.nb_mirrors_disabled} mirrors."
)
113 changes: 113 additions & 0 deletions backend/src/mirrors_qa_backend/db/mirrors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from dataclasses import dataclass

from sqlalchemy import select
from sqlalchemy.orm import Session as OrmSession
from sqlalchemy.orm import selectinload

from mirrors_qa_backend import logger, schemas
from mirrors_qa_backend.db import models
from mirrors_qa_backend.exceptions import EmptyMirrorsError


@dataclass
class MirrorsUpdateResult:
"""Results of an update to the list of mirrors in the database"""

nb_mirrors_added: int = 0
nb_mirrors_disabled: int = 0


def create_mirrors(session: OrmSession, mirrors: list[schemas.Mirror]) -> int:
"""Number of mirrors created in the database.
Assumes that each mirror does not exist on the database.
"""
nb_created = 0
for mirror in mirrors:
db_mirror = models.Mirror(
id=mirror.id,
base_url=mirror.base_url,
enabled=mirror.enabled,
region=mirror.region,
asn=mirror.asn,
score=mirror.score,
latitude=mirror.latitude,
longitude=mirror.longitude,
country_only=mirror.country_only,
region_only=mirror.country_only,
as_only=mirror.as_only,
other_countries=mirror.other_countries,
)
# Ensure the country exists for the mirror
country = session.scalars(
select(models.Country).where(models.Country.code == mirror.country.code)
).one_or_none()

if country is None:
country = models.Country(code=mirror.country.code, name=mirror.country.name)
session.add(country)

db_mirror.country = country
session.add(db_mirror)
logger.debug(
f"Registered new mirror: {db_mirror.id!r} for country: {country.name!r}"
)
nb_created += 1
return nb_created


def create_or_update_status(
session: OrmSession, mirrors: list[schemas.Mirror]
) -> MirrorsUpdateResult:
"""Updates the status of mirrors in the database and creates any new mirrors.
Raises:
EmptyMirrorsError if the provided list of mirrors is empty.
"""
if not mirrors:
raise EmptyMirrorsError("mirrors list must not be empty")

result = MirrorsUpdateResult()
# Map the id (hostname) of each mirror from the mirrors list for comparison
# against the id of mirrors from the database. To be used in determining
# if this mirror is a new mirror, in which case it should be added
current_mirrors: dict[str, schemas.Mirror] = {
mirror.id: mirror for mirror in mirrors
}

# Map the id (hostname) of each mirror from the database for comparison
# against the id of mirrors in current_mirrors. To be used in determining
# if this mirror should be disabled
query = select(models.Mirror).options(selectinload(models.Mirror.country))
db_mirrors: dict[str, models.Mirror] = {
mirror.id: mirror for mirror in session.scalars(query).all()
}

# Create any mirror that doesn't exist on the database
for mirror_id, mirror in current_mirrors.items():
if mirror_id not in db_mirrors:
# Create the mirror as it doesn't exist on the database.
result.nb_mirrors_added += create_mirrors(session, [mirror])

# Disable any mirror in the database that doesn't exist on the current
# list of mirrors. However, if a mirror is disabled in the database and
# exists in the list, re-enable it
for db_mirror_id, db_mirror in db_mirrors.items():
if db_mirror_id not in current_mirrors:
logger.debug(
f"Disabling mirror: {db_mirror.id!r} for "
f"country: {db_mirror.country.name!r}"
)
db_mirror.enabled = False
session.add(db_mirror)
result.nb_mirrors_disabled += 1
elif not db_mirror.enabled: # re-enable mirror if it was disabled
logger.debug(
f"Re-enabling mirror: {db_mirror.id!r} for "
f"country: {db_mirror.country.name!r}"
)
db_mirror.enabled = True
session.add(db_mirror)
result.nb_mirrors_added += 1
return result
61 changes: 34 additions & 27 deletions backend/src/mirrors_qa_backend/db/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from datetime import datetime
import datetime
from ipaddress import IPv4Address
from uuid import UUID

Expand All @@ -27,7 +27,7 @@ class Base(MappedAsDataclass, DeclarativeBase):
list[str]: ARRAY(
item_type=String
), # transform Python list[str] into PostgreSQL Array of strings
datetime: DateTime(
datetime.datetime: DateTime(
timezone=False
), # transform Python datetime into PostgreSQL Datetime without timezone
IPv4Address: INET, # transform Python IPV4Address into PostgreSQL INET
Expand Down Expand Up @@ -74,15 +74,15 @@ class Mirror(Base):
base_url: Mapped[str]
enabled: Mapped[bool]
# metadata of a mirror from MirroBrain (https://mirrorbrain-docs.readthedocs.io/en/latest/mirrors.html#displaying-details-about-a-mirror)
region: Mapped[str | None]
asn: Mapped[str | None]
score: Mapped[int | None]
latitude: Mapped[float | None]
longitude: Mapped[float | None]
country_only: Mapped[bool | None]
region_only: Mapped[bool | None]
as_only: Mapped[bool | None]
other_countries: Mapped[list[str] | None]
region: Mapped[str | None] = mapped_column(default=None)
asn: Mapped[str | None] = mapped_column(default=None)
score: Mapped[int | None] = mapped_column(default=None)
latitude: Mapped[float | None] = mapped_column(default=None)
longitude: Mapped[float | None] = mapped_column(default=None)
country_only: Mapped[bool | None] = mapped_column(default=None)
region_only: Mapped[bool | None] = mapped_column(default=None)
as_only: Mapped[bool | None] = mapped_column(default=None)
other_countries: Mapped[list[str] | None] = mapped_column(default=None)

country_code: Mapped[str] = mapped_column(
ForeignKey("country.code"),
Expand All @@ -97,9 +97,11 @@ class Worker(Base):
# RSA public key in PKCS8 format for generating access tokens required
# to make requests to the web server
pubkey_pkcs8: Mapped[str]
pubkey_fingerprint: Mapped[str | None]
pubkey_fingerprint: Mapped[str]

last_seen_on: Mapped[datetime | None]
last_seen_on: Mapped[datetime.datetime] = mapped_column(
default_factory=datetime.datetime.now
)
countries: Mapped[list[Country]] = relationship(back_populates="worker", init=False)


Expand All @@ -108,23 +110,28 @@ class Test(Base):
id: Mapped[UUID] = mapped_column(
init=False, primary_key=True, server_default=text("uuid_generate_v4()")
)
requested_on: Mapped[datetime]
started_on: Mapped[datetime | None]
status: Mapped[StatusEnum | None] = mapped_column(
requested_on: Mapped[datetime.datetime] = mapped_column(
default_factory=datetime.datetime.now
)
started_on: Mapped[datetime.datetime | None] = mapped_column(default=None)
status: Mapped[StatusEnum] = mapped_column(
Enum(
StatusEnum,
native_enum=False,
validate_strings=True,
create_constraint=True,
name="status",
)
),
default=StatusEnum.PENDING,
)
error: Mapped[str | None]
isp: Mapped[str | None]
ip_address: Mapped[IPv4Address | None]
asn: Mapped[str | None] # autonomous system based on IP
country: Mapped[str | None] # country based on IP
location: Mapped[str | None] # city based on IP
latency: Mapped[int | None] # milliseconds
download_size: Mapped[int | None] # bytes
duration: Mapped[int | None] # seconds
speed: Mapped[float | None] # bytes per second
error: Mapped[str | None] = mapped_column(default=None)
isp: Mapped[str | None] = mapped_column(default=None)
ip_address: Mapped[IPv4Address | None] = mapped_column(default=None)
# autonomous system based on IP
asn: Mapped[str | None] = mapped_column(default=None)
country: Mapped[str | None] = mapped_column(default=None) # country based on IP
location: Mapped[str | None] = mapped_column(default=None) # city based on IP
latency: Mapped[int | None] = mapped_column(default=None) # milliseconds
download_size: Mapped[int | None] = mapped_column(default=None) # bytes
duration: Mapped[int | None] = mapped_column(default=None) # seconds
speed: Mapped[float | None] = mapped_column(default=None) # bytes per second
25 changes: 25 additions & 0 deletions backend/src/mirrors_qa_backend/entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import argparse
import logging

from mirrors_qa_backend import db, logger
from mirrors_qa_backend.db import mirrors
from mirrors_qa_backend.extract import get_current_mirrors


def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--verbose", "-v", help="Show verbose output", action="store_true"
)

args = parser.parse_args()

if args.verbose:
logger.setLevel(logging.DEBUG)

with db.Session.begin() as session:
mirrors.create_or_update_status(session, get_current_mirrors())


if __name__ == "__main__":
main()
7 changes: 4 additions & 3 deletions backend/src/mirrors_qa_backend/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@


class StatusEnum(Enum):
MISSED = 0
SUCCEEDED = 1
ERRORED = 2
PENDING = 0
MISSED = 1
SUCCEEDED = 2
ERRORED = 3
13 changes: 13 additions & 0 deletions backend/src/mirrors_qa_backend/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from requests import RequestException


class EmptyMirrorsError(Exception):
"""An empty list was used to update the mirrors in the database."""


class MirrorsExtractError(Exception):
"""An error occurred while extracting mirror data from page DOM"""


class MirrorsRequestError(RequestException):
"""A network error occurred while fetching mirrors from the mirrors URL"""
Loading

0 comments on commit 661c4bb

Please sign in to comment.