From 08a06d453cfa8c10c1b11c0fb54c99be1a2dffb4 Mon Sep 17 00:00:00 2001 From: Sebastian Utz Date: Fri, 24 May 2024 14:06:36 +0200 Subject: [PATCH] cfr: Add support to export systables to a tarfile --- cratedb_toolkit/cfr/cli.py | 4 ++-- cratedb_toolkit/cfr/systable.py | 35 ++++++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/cratedb_toolkit/cfr/cli.py b/cratedb_toolkit/cfr/cli.py index 4aab7450..15dccfd7 100644 --- a/cratedb_toolkit/cfr/cli.py +++ b/cratedb_toolkit/cfr/cli.py @@ -46,8 +46,8 @@ def cli(ctx: click.Context, cratedb_sqlalchemy_url: str, verbose: bool, debug: b def sys_export(ctx: click.Context, target: str): cratedb_sqlalchemy_url = ctx.meta["cratedb_sqlalchemy_url"] try: - stc = SystemTableExporter(dburi=cratedb_sqlalchemy_url, target=path_from_url(target)) - path = stc.save() + stc = SystemTableExporter(dburi=cratedb_sqlalchemy_url) + path = stc.save(path_from_url(target)) jd({"path": str(path)}) except Exception as ex: error_logger(ctx)(ex) diff --git a/cratedb_toolkit/cfr/systable.py b/cratedb_toolkit/cfr/systable.py index ad9f1f0f..ae5f055f 100644 --- a/cratedb_toolkit/cfr/systable.py +++ b/cratedb_toolkit/cfr/systable.py @@ -17,6 +17,9 @@ import datetime as dt import logging +import os +import tarfile +import tempfile import typing as t from pathlib import Path @@ -92,14 +95,12 @@ class SystemTableExporter: Export schema and data from CrateDB system tables. """ - def __init__(self, dburi: str, target: t.Union[Path], data_format: DataFormat = "jsonl"): + def __init__(self, dburi: str, data_format: DataFormat = "jsonl"): self.dburi = dburi - self.target = target self.data_format = data_format self.adapter = DatabaseAdapter(dburi=self.dburi) self.info = InfoContainer(adapter=self.adapter) self.inspector = SystemTableInspector(dburi=self.dburi) - self.target.mkdir(exist_ok=True, parents=True) def read_table(self, tablename: str) -> pl.DataFrame: sql = f'SELECT * FROM "{SystemTableKnowledge.SYS_SCHEMA}"."{tablename}"' # noqa: S608 @@ -122,9 +123,27 @@ def dump_table(self, frame: pl.DataFrame, file: t.Union[t.TextIO, None] = None): else: raise NotImplementedError(f"Output format not implemented: {self.data_format}") - def save(self) -> Path: + def save(self, target: t.Union[Path]) -> Path: + temp_dir = None + if target.name.endswith(".tgz") or target.name.endswith(".tar.gz"): + temp_dir = tempfile.TemporaryDirectory() + target_folder = Path(temp_dir.name) + else: + target_folder = target + target.mkdir(exist_ok=True, parents=True) + + full_path = self.export(target_folder) + + if temp_dir is not None: + self.make_tarfile(target_folder, target) + temp_dir.cleanup() + logger.info(f"Created archive file {target}") + return target + return full_path + + def export(self, target_folder: Path) -> Path: timestamp = dt.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") - path = self.target / self.info.cluster_name / timestamp / "sys" + path = target_folder / self.info.cluster_name / timestamp / "sys" logger.info(f"Exporting system tables to: {path}") system_tables = self.inspector.table_names() path_schema = path / ExportSettings.SCHEMA_PATH @@ -166,6 +185,12 @@ def save(self) -> Path: logger.info(f"Successfully exported {table_count} system tables") return path + @staticmethod + def make_tarfile(source_folder: Path, target_file_path: Path) -> Path: + with tarfile.open(target_file_path, "x:gz") as tar: + tar.add(source_folder.absolute(), arcname=os.path.basename(source_folder)) + return target_file_path + class SystemTableImporter: """