- - env_file (git2s3.config.EnvConfig.Config attribute)
+
- env_loader() (in module git2s3.squire)
- env_prefix (git2s3.config.EnvConfig.Config attribute)
@@ -116,10 +127,20 @@ E
F
+H
+
+
L
@@ -163,28 +203,48 @@ L
M
-P
+N
+
+P
+
+
+R
+
@@ -193,12 +253,20 @@ S
W
diff --git a/docs/searchindex.js b/docs/searchindex.js
index d512abd..e2ec699 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["README", "index"], "filenames": ["README.md", "index.rst"], "titles": ["Git2S3", "Welcome to Git2S3\u2019s documentation!"], "terms": {"backup": [0, 1], "github": [0, 1], "project": 0, "aw": [0, 1], "s3": [0, 1], "platform": 0, "support": 0, "deploy": 0, "recommend": 0, "instal": 0, "python": 0, "3": 0, "10": 0, "11": 0, "us": [0, 1], "dedic": 0, "virtual": 0, "environ": [0, 1], "m": 0, "pip": 0, "initi": 0, "id": [0, 1], "import": 0, "__name__": 0, "__main__": 0, "git": 0, "start": [0, 1], "cli": [0, 1], "help": 0, "usag": 0, "instruct": 0, "docstr": 0, "format": 0, "googl": 0, "style": 0, "convent": 0, "pep": 0, "8": 0, "isort": 0, "requir": 0, "gitvers": 0, "revers": 0, "f": 0, "release_not": 0, "rst": 0, "t": 0, "pre": 0, "commit": 0, "ensur": 0, "run": 0, "pytest": 0, "gener": [0, 1], "valid": [0, 1], "hyperlink": 0, "all": [0, 1], "markdown": 0, "file": [0, 1], "includ": [0, 1], "wiki": [0, 1], "page": [0, 1], "sphinx": 0, "5": 0, "1": 0, "recommonmark": 0, "http": [0, 1], "org": [0, 1], "thevickypedia": 0, "io": 0, "vignesh": 0, "rao": 0, "under": 0, "mit": 0, "kick": 1, "off": 1, "code": 1, "standard": 1, "releas": 1, "note": 1, "lint": 1, "pypi": 1, "packag": 1, "runbook": 1, "licens": 1, "copyright": 1, "class": 1, "env_fil": 1, "str": 1, "o": 1, "pathlik": 1, "env": 1, "logger": 1, "none": 1, "max_per_pag": 1, "int": 1, "100": 1, "instanti": 1, "object": 1, "clone": 1, "repo": 1, "gist": 1, "from": 1, "upload": 1, "load": 1, "necessari": 1, "arg": 1, "creat": 1, "connect": 1, "api": 1, "perform": 1, "keyword": 1, "argument": 1, "bring": 1, "your": 1, "own": 1, "maximum": 1, "number": 1, "fetch": 1, "per": 1, "get_al": 1, "field": 1, "dict": 1, "iter": 1, "through": 1, "target": 1, "owner": 1, "organ": 1, "get": 1, "avail": 1, "repositori": 1, "paramet": 1, "type": 1, "yield": 1, "dictionari": 1, "each": 1, "inform": 1, "clone_wiki": 1, "model": 1, "store": 1, "worker": 1, "json": 1, "payload": 1, "rais": 1, "except": 1, "If": 1, "thread": 1, "fail": 1, "cloner": 1, "func": 1, "callabl": 1, "concurr": 1, "function": 1, "refer": 1, "com": 1, "commun": 1, "discuss": 1, "44515": 1, "process": 1, "config": 1, "envconfig": 1, "baseset": 1, "var": 1, "pydant": 1, "git_api_url": 1, "endpoint": 1, "within": 1, "an": 1, "git_own": 1, "name": 1, "gist_own": 1, "git_token": 1, "token": 1, "privat": 1, "option": 1, "restor": 1, "default": 1, "clone_dir": 1, "locat": 1, "local": 1, "debug": 1, "boolean": 1, "flag": 1, "enabl": 1, "level": 1, "log": 1, "doe": 1, "appli": 1, "when": 1, "custom": 1, "i": 1, "stdout": 1, "aws_profile_nam": 1, "profil": 1, "valu": 1, "aws_default_profil": 1, "aws_access_key_id": 1, "access": 1, "kei": 1, "aws_secret_access_kei": 1, "secret": 1, "aws_region_nam": 1, "bucket": 1, "region": 1, "aws_default_region": 1, "aws_bucket_nam": 1, "new": 1, "pars": 1, "input": 1, "data": 1, "validationerror": 1, "pydantic_cor": 1, "cannot": 1, "form": 1, "self": 1, "explicitli": 1, "posit": 1, "onli": 1, "allow": 1, "url": 1, "union": 1, "list": 1, "path": 1, "bool": 1, "logopt": 1, "classmethod": 1, "from_env_fil": 1, "filenam": 1, "instanc": 1, "variabl": 1, "ar": 1, "addit": 1, "featur": 1, "both": 1, "system": 1, "session": 1, "return": 1, "parse_field": 1, "remov": 1, "parse_clone_dir": 1, "check": 1, "potenti": 1, "env_prefix": 1, "extra": 1, "model_computed_field": 1, "classvar": 1, "computedfieldinfo": 1, "A": 1, "comput": 1, "correspond": 1, "index": 1, "modul": 1, "search": 1}, "objects": {"git2s3.config": [[1, 0, 1, "", "EnvConfig"]], "git2s3.config.EnvConfig": [[1, 0, 1, "", "Config"], [1, 1, 1, "", "aws_access_key_id"], [1, 1, 1, "", "aws_bucket_name"], [1, 1, 1, "", "aws_profile_name"], [1, 1, 1, "", "aws_region_name"], [1, 1, 1, "", "aws_secret_access_key"], [1, 1, 1, "", "clone_dir"], [1, 1, 1, "", "debug"], [1, 1, 1, "", "fields"], [1, 2, 1, "", "from_env_file"], [1, 1, 1, "", "gist_owner"], [1, 1, 1, "", "git_api_url"], [1, 1, 1, "", "git_owner"], [1, 1, 1, "", "git_token"], [1, 1, 1, "", "log"], [1, 1, 1, "", "model_computed_fields"], [1, 2, 1, "", "parse_clone_dir"], [1, 2, 1, "", "parse_fields"]], "git2s3.config.EnvConfig.Config": [[1, 1, 1, "", "env_file"], [1, 1, 1, "", "env_prefix"], [1, 1, 1, "", "extra"]], "git2s3": [[1, 3, 0, "-", "main"]], "git2s3.main": [[1, 0, 1, "", "Git2S3"]], "git2s3.main.Git2S3": [[1, 2, 1, "", "clone_wiki"], [1, 2, 1, "", "cloner"], [1, 2, 1, "", "get_all"], [1, 2, 1, "", "start"], [1, 2, 1, "", "worker"]]}, "objtypes": {"0": "py:class", "1": "py:attribute", "2": "py:method", "3": "py:module"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "attribute", "Python attribute"], "2": ["py", "method", "Python method"], "3": ["py", "module", "Python module"]}, "titleterms": {"git2s3": [0, 1], "kick": 0, "off": 0, "code": 0, "standard": 0, "releas": 0, "note": 0, "lint": 0, "pypi": 0, "packag": 0, "runbook": 0, "licens": 0, "copyright": 0, "welcom": 1, "": 1, "document": 1, "content": 1, "main": 1, "configur": 1, "indic": 1, "tabl": 1}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 56}})
\ No newline at end of file
+Search.setIndex({"docnames": ["README", "index"], "filenames": ["README.md", "index.rst"], "titles": ["Git2S3", "Welcome to Git2S3\u2019s documentation!"], "terms": {"backup": 0, "github": [0, 1], "project": 0, "aw": 0, "s3": [0, 1], "platform": 0, "support": 0, "deploy": 0, "recommend": 0, "instal": 0, "python": 0, "3": 0, "10": 0, "11": 0, "us": [0, 1], "dedic": 0, "virtual": 0, "m": 0, "pip": 0, "initi": 0, "id": 0, "import": 0, "__name__": 0, "__main__": 0, "git": 0, "start": [0, 1], "cli": 0, "help": 0, "usag": 0, "instruct": 0, "sourc": 0, "from": [0, 1], "an": [0, 1], "env": [0, 1], "file": [0, 1], "can": 0, "ani": 0, "plaintext": 0, "json": [0, 1], "yaml": 0, "The": 0, "filepath": 0, "should": 0, "provid": 0, "argument": [0, 1], "dure": 0, "object": [0, 1], "instanti": [0, 1], "By": 0, "default": [0, 1], "look": 0, "current": 0, "work": 0, "directori": 0, "exampl": 0, "path": [0, 1], "env_fil": [0, 1], "_": 0, "api": 0, "url": [0, 1], "endpoint": 0, "http": [0, 1], "com": [0, 1], "owner": [0, 1], "profil": [0, 1], "organ": [0, 1], "name": [0, 1], "token": 0, "get": [0, 1], "all": [0, 1], "repo": [0, 1], "includ": 0, "privat": [0, 1], "field": [0, 1], "option": [0, 1], "restor": 0, "log": [0, 1], "stdout": [0, 1], "doe": 0, "appli": 0, "when": 0, "custom": 0, "logger": [0, 1], "i": 0, "debug": [0, 1], "boolean": 0, "flag": 0, "enabl": 0, "level": 0, "config": [0, 1], "valu": [0, 1], "aws_default_profil": 0, "access": 0, "kei": 0, "aws_access_key_id": [0, 1], "secret": 0, "aws_secret_access_kei": [0, 1], "region": 0, "bucket": 0, "": 0, "aws_default_region": 0, "store": [0, 1], "docstr": 0, "format": 0, "googl": 0, "style": 0, "convent": 0, "pep": 0, "8": 0, "isort": 0, "requir": 0, "gitvers": 0, "revers": 0, "f": 0, "release_not": 0, "rst": 0, "t": 0, "pre": 0, "commit": 0, "ensur": 0, "run": 0, "pytest": 0, "gener": [0, 1], "valid": [0, 1], "hyperlink": 0, "markdown": 0, "wiki": [0, 1], "page": [0, 1], "sphinx": 0, "5": 0, "1": 0, "recommonmark": 0, "org": [0, 1], "thevickypedia": 0, "io": 0, "vignesh": 0, "rao": 0, "under": 0, "mit": 0, "kick": 1, "off": 1, "environ": 1, "variabl": 1, "code": 1, "standard": 1, "releas": 1, "note": 1, "lint": 1, "pypi": 1, "packag": 1, "runbook": 1, "licens": 1, "copyright": 1, "class": 1, "str": 1, "o": 1, "pathlik": 1, "none": 1, "max_per_pag": 1, "int": 1, "100": 1, "clone": 1, "gist": 1, "upload": 1, "keyword": 1, "bring": 1, "your": 1, "own": 1, "maximum": 1, "number": 1, "fetch": 1, "per": 1, "profile_typ": 1, "type": 1, "return": 1, "get_al": 1, "dict": 1, "iter": 1, "through": 1, "target": 1, "avail": 1, "repositori": 1, "paramet": 1, "yield": 1, "dictionari": 1, "each": 1, "inform": 1, "clone_wiki": 1, "model": 1, "worker": 1, "payload": 1, "rais": 1, "except": 1, "If": 1, "thread": 1, "fail": 1, "cloner": 1, "func": 1, "callabl": 1, "concurr": 1, "function": 1, "refer": 1, "commun": 1, "discuss": 1, "44515": 1, "process": 1, "env_load": 1, "filenam": 1, "envconfig": 1, "load": 1, "base": 1, "filetyp": 1, "where": 1, "var": 1, "have": 1, "field_detector": 1, "detect": 1, "default_logg": 1, "consol": 1, "basemodel": 1, "clone_url": 1, "descript": 1, "bool": 1, "model_computed_field": 1, "classvar": 1, "computedfieldinfo": 1, "A": 1, "comput": 1, "correspond": 1, "baseset": 1, "pydant": 1, "git_api_url": 1, "git_own": 1, "git_token": 1, "union": 1, "list": 1, "logopt": 1, "aws_profile_nam": 1, "aws_region_nam": 1, "aws_bucket_nam": 1, "classmethod": 1, "from_env_fil": 1, "creat": 1, "instanc": 1, "ar": 1, "addit": 1, "featur": 1, "both": 1, "system": 1, "session": 1, "parse_field": 1, "pars": 1, "remov": 1, "parse_git_api_url": 1, "strip": 1, "end": 1, "env_prefix": 1, "extra": 1, "allow": 1, "hide_input_in_error": 1, "true": 1, "strenum": 1, "index": 1, "modul": 1, "search": 1}, "objects": {"git2s3.config": [[1, 0, 1, "", "EnvConfig"], [1, 0, 1, "", "Field"], [1, 0, 1, "", "Fields"], [1, 0, 1, "", "LogOptions"]], "git2s3.config.EnvConfig": [[1, 0, 1, "", "Config"], [1, 1, 1, "", "aws_access_key_id"], [1, 1, 1, "", "aws_bucket_name"], [1, 1, 1, "", "aws_profile_name"], [1, 1, 1, "", "aws_region_name"], [1, 1, 1, "", "aws_secret_access_key"], [1, 1, 1, "", "debug"], [1, 1, 1, "", "fields"], [1, 2, 1, "", "from_env_file"], [1, 1, 1, "", "git_api_url"], [1, 1, 1, "", "git_owner"], [1, 1, 1, "", "git_token"], [1, 1, 1, "", "log"], [1, 2, 1, "", "parse_fields"], [1, 2, 1, "", "parse_git_api_url"]], "git2s3.config.EnvConfig.Config": [[1, 1, 1, "", "env_prefix"], [1, 1, 1, "", "extra"], [1, 1, 1, "", "hide_input_in_errors"]], "git2s3.config.Field": [[1, 1, 1, "", "clone_url"], [1, 1, 1, "", "description"], [1, 1, 1, "", "field"], [1, 1, 1, "", "model_computed_fields"], [1, 1, 1, "", "name"], [1, 1, 1, "", "private"]], "git2s3.config.Fields": [[1, 1, 1, "", "all"], [1, 1, 1, "", "gist"], [1, 1, 1, "", "repo"], [1, 1, 1, "", "wiki"]], "git2s3.config.LogOptions": [[1, 1, 1, "", "file"], [1, 1, 1, "", "stdout"]], "git2s3": [[1, 3, 0, "-", "main"], [1, 3, 0, "-", "squire"]], "git2s3.main": [[1, 0, 1, "", "Git2S3"]], "git2s3.main.Git2S3": [[1, 2, 1, "", "clone_wiki"], [1, 2, 1, "", "cloner"], [1, 2, 1, "", "get_all"], [1, 2, 1, "", "profile_type"], [1, 2, 1, "", "start"], [1, 2, 1, "", "worker"]], "git2s3.squire": [[1, 4, 1, "", "default_logger"], [1, 4, 1, "", "env_loader"], [1, 4, 1, "", "field_detector"]]}, "objtypes": {"0": "py:class", "1": "py:attribute", "2": "py:method", "3": "py:module", "4": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "attribute", "Python attribute"], "2": ["py", "method", "Python method"], "3": ["py", "module", "Python module"], "4": ["py", "function", "Python function"]}, "titleterms": {"git2s3": [0, 1], "kick": 0, "off": 0, "environ": 0, "variabl": 0, "code": 0, "standard": 0, "releas": 0, "note": 0, "lint": 0, "pypi": 0, "packag": 0, "runbook": 0, "licens": 0, "copyright": 0, "welcom": 1, "": 1, "document": 1, "content": 1, "main": 1, "squir": 1, "configur": 1, "indic": 1, "tabl": 1}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 56}})
\ No newline at end of file
diff --git a/git2s3/config.py b/git2s3/config.py
index 9769ab5..5cbc4bb 100644
--- a/git2s3/config.py
+++ b/git2s3/config.py
@@ -1,9 +1,6 @@
-import os
import pathlib
-import shutil
import sys
-import warnings
-from typing import List
+from typing import List, Optional
from pydantic import BaseModel, DirectoryPath, HttpUrl, field_validator
from pydantic_settings import BaseSettings
@@ -51,7 +48,7 @@ class Field(BaseModel):
field: Fields
clone_url: HttpUrl
name: str
- description: str
+ description: Optional[str] = None
private: bool
@@ -61,32 +58,15 @@ class EnvConfig(BaseSettings):
>>> EnvConfig
- Notes:
- - **git_api_url** - GitHub API endpoint to fetch all repos within an organization or owner.
- - **git_owner** - GitHub owner or organization name.
- - **gist_owner** - GitHub owner name for gists.
- - **git_token** - GitHub token to get ALL repos (including private).
- - **fields** - Fields options to restore. Defaults to all.
- - **clone_dir** - Backup location to store the files locally.
- - **debug** - Boolean flag to enable debug level logging. Does not apply when custom logger is used.
- - **log** - Log options to log to a ``file`` or ``stdout``. Does not apply when custom logger is used.
- - **aws_profile_name** - AWS profile name. Uses the CLI config value ``AWS_DEFAULT_PROFILE`` by default.
- - **aws_access_key_id** - AWS access key ID. Uses the CLI config value ``AWS_ACCESS_KEY_ID`` by default.
- - **aws_secret_access_key** - AWS secret key. Uses the CLI config value ``AWS_SECRET_ACCESS_KEY`` by default.
- - **aws_region_name** - S3 bucket's region. Uses the CLI config value ``AWS_DEFAULT_REGION`` by default.
- - **aws_bucket_name** - AWS bucket name to store the backups.
"""
git_api_url: HttpUrl = "https://api.github.com/"
- git_owner: str | None = None
- gist_owner: str | None = None
- git_token: str | None = None
- fields: Fields | List[Fields] = Fields.all
- clone_dir: DirectoryPath = os.path.join(os.getcwd(), "backup")
- # todo: Add CLI option to pass path for the secrets file
+ git_owner: str
+ git_token: str
- debug: bool = False
+ fields: Fields | List[Fields] = Fields.all
log: LogOptions = LogOptions.stdout
+ debug: bool = False
aws_profile_name: str | None = None
aws_access_key_id: str | None = None
@@ -113,7 +93,7 @@ def from_env_file(cls, filename: pathlib.Path) -> "EnvConfig":
@field_validator("fields", mode="after", check_fields=True)
def parse_fields(cls, value: Fields | List[Fields]) -> DirectoryPath:
- """Validate and parse fields to remove all."""
+ """Validate and parse 'fields' to remove 'all' from the fields option."""
if isinstance(value, list):
if value == [Fields.all] or Fields.all in value:
return [Fields.repo, Fields.gist, Fields.wiki]
@@ -122,27 +102,14 @@ def parse_fields(cls, value: Fields | List[Fields]) -> DirectoryPath:
return [Fields.repo, Fields.gist, Fields.wiki]
raise ValueError(f"{value!r} is not a valid field type")
- @field_validator("clone_dir", mode="before", check_fields=True)
- def parse_clone_dir(cls, value: str) -> DirectoryPath:
- """Validate clone_dir to check if it is potential path."""
- if os.path.isdir(value):
- if os.listdir(value):
- # Re-creates the backup directory if it already exists and is not empty
- warnings.simplefilter("always", ResourceWarning)
- warnings.warn(
- f"Directory {value!r} is not empty, cleaning up...", ResourceWarning
- )
- shutil.rmtree(value)
- os.makedirs(value)
- return value
- if os.path.isdir(os.path.dirname(value)):
- os.makedirs(value)
- return value
- raise ValueError(f"{value!r} is neither a valid path, nor a potential path")
+ @field_validator("git_api_url", mode="after", check_fields=True)
+ def parse_git_api_url(cls, value: HttpUrl) -> str:
+ """Parse git_api_url stripping the ``/`` at the end."""
+ return str(value).rstrip("/")
class Config:
"""Environment variables configuration."""
- env_file = os.environ.get("env_file", ".env")
env_prefix = ""
extra = "allow"
+ hide_input_in_errors = True
diff --git a/git2s3/main.py b/git2s3/main.py
index a48433b..95a5c4d 100644
--- a/git2s3/main.py
+++ b/git2s3/main.py
@@ -1,8 +1,10 @@
import logging
import multiprocessing
import os
+import secrets
import shutil
import threading
+import warnings
from collections.abc import Generator
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Callable, Dict
@@ -14,11 +16,16 @@
from git2s3 import config, squire
+# noinspection PyUnresolvedReferences
class Git2S3:
"""Instantiates Git2S3 object to clone all repos/wiki/gists from GitHub and upload to S3.
>>> Git2S3
+ Keyword Args:
+ env_file: Environment configuration.
+ logger: Bring your own logger object.
+ max_per_page: Maximum number of repos to fetch per page.
"""
def __init__(
@@ -27,19 +34,12 @@ def __init__(
logger: logging.Logger = None,
max_per_page: int = 100,
):
- """Loads all the necessary args, creates a connection to GitHub API and S3 to perform the backup.
-
- Keyword Args:
- env_file: Environment configuration.
- logger: Bring your own logger object.
- max_per_page: Maximum number of repos to fetch per page.
- """
+ """Instantiates Git2S3 object to clone all repos/wiki/gists from GitHub and upload to S3."""
assert 1 <= max_per_page <= 100, "'max_per_page' must be between 1 and 100"
self.per_page = max_per_page
self.src_logger = logger
self.env = squire.env_loader(env_file)
self.logger = self.src_logger or squire.default_logger(self.env)
- self.repo = git.Repo()
self.session = requests.Session()
self.session.headers = {
"Accept": "application/vnd.github+json",
@@ -47,6 +47,44 @@ def __init__(
"X-GitHub-Api-Version": "2022-11-28",
"Content-Type": "application/x-www-form-urlencoded",
}
+ self.repo = git.Repo()
+ self.clone_dir = os.path.join(os.getcwd(), self.env.git_owner)
+ profile = self.profile_type()
+ if profile == "orgs":
+ if config.Fields.gist in self.env.fields:
+ warnings.warn(
+ f"Gists are not supported for organizations. Removing {config.Fields.gist!r} from the fields.",
+ UserWarning,
+ )
+ self.env.fields.remove(config.Fields.gist)
+ self.base_url = f"{self.env.git_api_url}/{profile}/{self.env.git_owner}"
+
+ def profile_type(self) -> str:
+ """Get the profile type.
+
+ Returns:
+ str:
+ Returns the profile type.
+ """
+ try:
+ response = self.session.get(
+ f"{self.env.git_api_url}/orgs/{self.env.git_owner}"
+ )
+ assert response.ok
+ return "orgs"
+ except (requests.RequestException, AssertionError):
+ pass
+ try:
+ response = self.session.get(
+ f"{self.env.git_api_url}/users/{self.env.git_owner}"
+ )
+ assert response.ok
+ return "users"
+ except (requests.RequestException, AssertionError):
+ pass
+ raise Exception(
+ f"Failed to get the profile type for {self.env.git_owner}. Please check the owner/organization name."
+ )
def get_all(self, field: config.Fields) -> Generator[Dict[str, str]]:
"""Iterate through a target owner/organization to get all available repositories/gists.
@@ -58,13 +96,12 @@ def get_all(self, field: config.Fields) -> Generator[Dict[str, str]]:
Generator[Dict[str, str]]:
Yields a dictionary of each repo's information.
"""
- if not self.env.gist_owner:
- self.env.gist_owner = self.env.git_owner
if field == config.Fields.repo:
- base_url = f"{self.env.git_api_url}orgs/{self.env.git_owner}/repos"
+ endpoint = f"{self.base_url}/repos"
elif field == config.Fields.gist:
- base_url = f"{self.env.git_api_url}users/{self.env.gist_owner}/gists"
+ endpoint = f"{self.base_url}/gists"
else:
+ # This won't occur programmatically, but here just in case
raise ValueError(
f"Invalid field type. Please choose from {config.Fields.repo.value!r} or {config.Fields.gist.value!r}"
)
@@ -73,7 +110,7 @@ def get_all(self, field: config.Fields) -> Generator[Dict[str, str]]:
self.logger.debug("Fetching repos from page %d", idx)
try:
response = self.session.get(
- url=f"{base_url}?per_page={self.per_page}&page={idx}"
+ url=endpoint, params={"per_page": self.per_page, "page": idx}
)
assert response.ok, response.text
except (requests.RequestException, AssertionError) as error:
@@ -84,6 +121,7 @@ def get_all(self, field: config.Fields) -> Generator[Dict[str, str]]:
self.logger.debug(
"Repositories in page %d: %d", idx, len(json_response)
)
+ # Yields dictionary from a list
yield from json_response
idx += 1
else:
@@ -101,11 +139,11 @@ def clone_wiki(self, field: config.Field) -> None:
wiki_url = str(field.clone_url).replace(".git", ".wiki.git")
if field.private:
wiki_dest = str(
- os.path.join(self.env.clone_dir, field.field, "private", field.name)
+ os.path.join(self.clone_dir, field.field, "private", field.name)
)
else:
wiki_dest = str(
- os.path.join(self.env.clone_dir, field.field, "public", field.name)
+ os.path.join(self.clone_dir, field.field, "public", field.name)
)
if not os.path.isdir(wiki_dest):
os.makedirs(wiki_dest)
@@ -128,18 +166,14 @@ def worker(self, repo: Dict[str, str]) -> None:
If the thread fails to clone the repository.
"""
target = squire.field_detector(repo, self.env)
- self.logger.info("Cloning %s", target.name)
+ self.logger.info("Cloning %s: %s", target.field, target.name)
if target.private:
repo_dest = str(
- os.path.join(
- self.env.clone_dir, target.field.value, "private", target.name
- )
+ os.path.join(self.clone_dir, target.field.value, "private", target.name)
)
else:
repo_dest = str(
- os.path.join(
- self.env.clone_dir, target.field.value, "public", target.name
- )
+ os.path.join(self.clone_dir, target.field.value, "public", target.name)
)
# only repos have this field anyway
if config.Fields.wiki in self.env.fields and repo.get("has_wiki"):
@@ -151,9 +185,17 @@ def worker(self, repo: Dict[str, str]) -> None:
os.makedirs(repo_dest)
try:
self.repo.clone_from(target.clone_url, repo_dest)
- with open(os.path.join(repo_dest, "description.txt"), "w") as desc:
- desc.write(target.description)
- desc.flush()
+ try:
+ if target.description:
+ desc_file = os.path.join(
+ repo_dest, f"description_{secrets.token_hex(2)}.txt"
+ )
+ with open(desc_file, "w") as desc:
+ desc.write(target.description)
+ desc.flush()
+ except Exception as warning:
+ # Adding description file is only an added feature, so no need to fail
+ self.logger.warning(warning)
shutil.make_archive(repo_dest, "zip", repo_dest)
if os.path.isfile(f"{repo_dest}.zip"):
shutil.rmtree(repo_dest)
@@ -183,11 +225,12 @@ def cloner(self, func: Callable, field: str) -> None:
with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
for repo in func(field):
future = executor.submit(self.worker, repo)
- futures[future] = repo.get("name")
+ futures[future] = repo.get("name") or repo.get("id")
for future in as_completed(futures):
if future.exception():
self.logger.error(
- "Thread processing for '%s' received an exception: %s",
+ "Thread cloning the %s '%s' received an exception: %s",
+ field,
futures[future],
future.exception(),
)
@@ -195,6 +238,7 @@ def cloner(self, func: Callable, field: str) -> None:
def start(self) -> None:
"""Start the cloning process."""
self.logger.info("Starting cloning process...")
+ # Both processes run concurrently, calling the same function with different arguments
processes = [
multiprocessing.Process(
target=self.cloner,
diff --git a/git2s3/squire.py b/git2s3/squire.py
index bf8b339..0aa5865 100644
--- a/git2s3/squire.py
+++ b/git2s3/squire.py
@@ -52,7 +52,7 @@ def field_detector(repo: Dict[str, str], env: EnvConfig) -> Field:
Field:
Field model.
"""
- if repo.get("comments_url") == f"{env.git_api_url}gists/{repo['id']}/comments":
+ if repo.get("comments_url") == f"{env.git_api_url}/gists/{repo['id']}/comments":
return Field(
field=Fields.gist,
clone_url=repo["git_pull_url"],
|