update

alexpdev · Jun 17, 2024 · 17e5092 · 17e5092
1 parent eb71563
commit 17e5092
Show file tree

Hide file tree

Showing 13 changed files with 255 additions and 1,619 deletions.
diff --git a/.editorconfig b/.editorconfig
diff --git a/MANIFEST.in b/MANIFEST.in
diff --git a/Makefile b/Makefile
@@ -1,3 +1,5 @@
+version = "0.1.2"
+
 clean : ## Remove residual files
 	rm -rfv **/__pycache__
 	rm -rfv *.egg-info

diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+![EbookAtty](./ebookatty.png)
+
 # ebookatty
 
 ![License](https://img.shields.io/badge/License-LGPL-blue?style=for-the-badge&logo=appveyor)
@@ -45,10 +47,58 @@ from ebookatty import MetadataFetcher
 GNU LGPL v3.0
 [LICENSE FILE](./LICENSE.md)
 
-
 ## Usage
 
-### Example Use
+### Example API Use
+
+__example 1__
+```py
+>>> from pprint import pprint
+>>> from ebookatty import fetch_metadata
+>>> file_path = "Romeo and Juliet - William Shakespeare.mobi"
+>>> data = fetch_metadata(file_path)
+>>> pprint(data)
+{'author': 'William Shakespeare',
+ 'codec': 'utf-8',
+ 'codepage': '65001',
+ 'contributor': 'calibre (0.7.23) [http://calibre-ebook.com]',
+ 'description': 'SUMMARY: These violent delights have violent ends And in '
+                'their triumph die, like fire and powder, Which, as they kiss, '
+                'consume. When Romeo first lays eyes on the bewitching Juliet, '
+                "it's love at first sight. But though their love runs true and "
+                'deep, it is also completely forbidden. With family and fate '
+                'determined to keep them apart, will Romeo and Juliet find a '
+                "way to be together? William Shakespeare's masterpiece is one "
+                'of the most enduring stories of star-crossed love of all '
+                'time. Beautifully presented for a modern teen audience with '
+                'both the original play and a prose retelling of the beloved '
+                'story, this is the must-have edition of a timeless classic.',
+ 'doctype': 'EXTH; MOBI',
+ 'filetype': '.mobi',
+ 'identity': 'BOOKMOBI',
+ 'isbn': '9780061965494',
+ 'langid': '9',
+ 'name': 'Romeo and Juliet - William Shakespeare',
+ 'published': '2009-08-15T07:00:00+00:00',
+ 'publisher': 'HarperCollins',
+ 'rights': 'NONE',
+ 'subject': 'William; Renaissance; Vendetta; Juvenile Nonfiction; Historical; '
+            "Children's Books; Man-woman relationships; Historical - "
+            "Renaissance; Love & Romance; Playscripts (Children's; Shakespeare "
+            'plays; Shakespeare; Irish; 1564-1616; YA); General; Young Adult '
+            'Graphic Novels; Classics; Young Adult General Interest & Leisure; '
+            "Children's Books - Young Adult Fiction; Welsh; Juliet (Fictitious "
+            'character); Children: Young Adult (Gr. 7-9); Conflict of '
+            'generations; Verona (Italy); Romeo (Fictitious character); '
+            'Juvenile Fiction; Scottish; Drama; English',
+ 'title': 'Romeo and Juliet',
+ 'type': '2',
+ 'unique_id': '1974853891',
+ 'updatedtitle': 'Romeo and Juliet',
+ 'version': '0; 6'}
+```
+
+### Example CLI Use
 
 __example 1__
 ```bash

diff --git a/a.txt b/a.txt
diff --git a/ebookatty.png b/ebookatty.png
diff --git a/ebookatty/__init__.py b/ebookatty/__init__.py
@@ -21,9 +21,9 @@
 #########################################################################
 """__init__ module for application."""
 
-from ebookatty.metadata import MetadataFetcher
+from ebookatty.metadata import MetadataFetcher, fetch_metadata
 from ebookatty.cli import execute
 
 __version__ = "0.3.1"
 
-__all__ = ["MetadataFetcher", "execute"]
+__all__ = ["MetadataFetcher", "execute", "fetch_metadata"]
diff --git a/ebookatty/cli.py b/ebookatty/cli.py
@@ -19,16 +19,17 @@
 #########################################################################
 """Utility functions and methods."""
 
-from glob import glob
-from pathlib import Path
+import argparse
 import json
 import sys
-import argparse
-import csv
+from glob import glob
+from pathlib import Path
+from typing import List
 
 from ebookatty import MetadataFetcher
 
-def find_matches(files: list) -> list:
+
+def find_matches(files: List[str]) -> List[str]:
     """
     Search list and find matching file paths that fit patterns.
 
@@ -55,8 +56,17 @@ def execute():
     This is the applications main entrypoint and CLI implementation.
     """
     parser = argparse.ArgumentParser(description="get ebook metadata", prefix_chars="-")
-    parser.add_argument('file', help='path to ebook file(s), standard file pattern extensions are allowed.', nargs=1)
-    parser.add_argument('-o', '--output', help='file path where metadata will be written. Acceptable formats include json and csv and are determined based on the file extension. Default is None', action="store")
+    parser.add_argument(
+        "file",
+        help="path to ebook file(s), standard file pattern extensions are allowed.",
+        nargs=1,
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        help="file path where metadata will be written. Acceptable formats include json and csv and are determined based on the file extension. Default is None",
+        action="store",
+    )
     if len(sys.argv[1:]) == 0:
         sys.argv.append("-h")
     args = parser.parse_args(sys.argv[1:])
@@ -67,10 +77,12 @@ def execute():
         fetcher = MetadataFetcher(match)
         data = fetcher.get_metadata()
         datas.append(data)
+        if not args.output:
+            fetcher.show_metadata()
     if args.output:
         path = Path(args.output)
         if path.suffix == ".json":
-            json.dump(datas, open(path,"wt"))
+            json.dump(datas, open(path, "wt"))
         elif path.suffix == ".csv":
             d = set()
             for row in datas:

diff --git a/ebookatty/epub.py b/ebookatty/epub.py
@@ -23,6 +23,7 @@
 import zipfile
 from pathlib import Path
 from xml.etree import ElementTree as ET
+
 from ebookatty.standards import OPF_TAGS
 
 
@@ -51,7 +52,7 @@ def __init__(self, path: str):
         meta = self.iterer(root)
         for key, val in meta.items():
             if val:
-                val = '; '.join([str(i) for i in set(val)])
+                val = "; ".join([str(i) for i in set(val)])
                 if val == "en":
                     val = "English"
                 meta[key] = val
@@ -77,7 +78,7 @@ def iterer(self, root: ET.Element) -> dict:
         dict
             all metadata extracted from element and its children
         """
-        pattern = re.compile(r'\{.*\}(\w+)')
+        pattern = re.compile(r"\{.*\}(\w+)")
         match = pattern.findall(root.tag)[0]
         if match in self.tags and root.text not in [None, "None", "NONE"]:
             meta = {match: [root.text]}
@@ -86,8 +87,8 @@ def iterer(self, root: ET.Element) -> dict:
         for element in root:
             if element != root:
                 data = self.iterer(element)
-                for k,v in data.items():
-                    meta.setdefault(k,[])
+                for k, v in data.items():
+                    meta.setdefault(k, [])
                     meta[k].extend(v)
         return meta
 
@@ -102,13 +103,15 @@ def get_opf(self) -> str:
         str
             the absolute path to the opf file contained in the ziparchive
         """
-        ns = {'n': 'urn:oasis:names:tc:opendocument:xmlns:container',
-              'pkg': 'http://www.idpf.org/2007/opf',
-              'dc': 'http://purl.org/dc/elements/1.1/'}
-        txt = self.epub_zip.read('META-INF/container.xml')
+        ns = {
+            "n": "urn:oasis:names:tc:opendocument:xmlns:container",
+            "pkg": "http://www.idpf.org/2007/opf",
+            "dc": "http://purl.org/dc/elements/1.1/",
+        }
+        txt = self.epub_zip.read("META-INF/container.xml")
         tree = ET.fromstring(txt)
-        elems = tree.findall('n:rootfiles/n:rootfile', namespaces=ns)
+        elems = tree.findall("n:rootfiles/n:rootfile", namespaces=ns)
         for elem in elems:
-            if 'full-path' in elem.attrib:
-                return elem.attrib['full-path']
+            if "full-path" in elem.attrib:
+                return elem.attrib["full-path"]
         return None
diff --git a/ebookatty/metadata.py b/ebookatty/metadata.py
@@ -22,9 +22,12 @@
 
 Classes and functions for .azw, .azw3, and .kfx ebooks.
 """
-from pathlib import Path
 import shutil
-from ebookatty import mobi, epub, standards
+from pathlib import Path
+from typing import Dict, Generator, Union
+
+from ebookatty import epub, mobi, standards
+
 
 class MetadataFetcher:
     """Primary Entrypoint for extracting metadata from most ebook filetypes."""
@@ -44,9 +47,9 @@ def __init__(self, path: str):
         elif self.path.suffix in [".azw3", "azw", "kfx", ".mobi"]:
             self.meta = mobi.Kindle(self.path)
         else:
-            self.meta = {}
+            self.meta = mobi.Kindle(self.path)
 
-    def get_metadata(self) -> dict:
+    def show_metadata(self) -> Dict[str, str]:
         """
         Call to start the extraction process.
 
@@ -63,6 +66,43 @@ def get_metadata(self) -> dict:
                 return self.metadata
         return {}
 
+    def get_metadata(self) -> Dict[str, str]:
+        """Retreive metadata from ebook.
+
+        Returns
+        -------
+        Dict[str, str]
+            metadata dictionary
+        """
+        return self.meta.metadata
+
+
+def fetch_metadata(path: Union[str | Path]) -> Dict[str, str]:
+    """Retreive metadata for ebook located at the supplied file path.
+
+    Parameters
+    ----------
+    path : Union[str | Path]
+        file path of the ebook.
+
+    Returns
+    -------
+    Dict[str, str]
+        Ebook metadata available.
+    """
+    path = Path(path)
+    try:
+        if path.suffix.lower() == ".epub":
+            meta = epub.Epub(path)
+        elif path.suffix.lower() in [".azw3", "azw", "kfx", ".mobi"]:
+            meta = mobi.Kindle(path)
+        else:
+            meta = mobi.Kindle(path)
+        return meta.metadata
+    except Exception:
+        return None
+
+
 def format_output(book: dict) -> str:
     """
     Format the output for printing to STDOUT.
@@ -104,13 +144,14 @@ def format_output(book: dict) -> str:
                 text += extra + section + "\n"
             output.append(text)
     output = sorted(output, key=len)
-    output.insert(0,"\n" +("-" * long_line))
+    output.insert(0, "\n" + ("-" * long_line))
     output.append(("-" * long_line) + "\n")
     final = "\n".join(output)
     print(final)
     return output
 
-def text_sections(section_size: int, text: str) -> str:
+
+def text_sections(section_size: int, text: str) -> Generator:
     """
     Split large text sections into smaller portions and yield result.
 
@@ -129,13 +170,13 @@ def text_sections(section_size: int, text: str) -> str:
 
     Yields
     ------
-    Iterator[str]
+    Generator[str]
         the next section of the divided text.
     """
     while len(text) > section_size:
         size = section_size
-        while text[size] != ' ':
+        while text[size] != " ":
             size -= 1
         yield text[:size]
-        text = text[size+1:]
+        text = text[size + 1 :]
     yield text