Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
alexpdev committed Jun 17, 2024
1 parent eb71563 commit 17e5092
Show file tree
Hide file tree
Showing 13 changed files with 255 additions and 1,619 deletions.
25 changes: 0 additions & 25 deletions .editorconfig

This file was deleted.

Empty file removed MANIFEST.in
Empty file.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
version = "0.1.2"

clean : ## Remove residual files
rm -rfv **/__pycache__
rm -rfv *.egg-info
Expand Down
54 changes: 52 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
![EbookAtty](./ebookatty.png)

# ebookatty

![License](https://img.shields.io/badge/License-LGPL-blue?style=for-the-badge&logo=appveyor)
Expand Down Expand Up @@ -45,10 +47,58 @@ from ebookatty import MetadataFetcher
GNU LGPL v3.0
[LICENSE FILE](./LICENSE.md)


## Usage

### Example Use
### Example API Use

__example 1__
```py
>>> from pprint import pprint
>>> from ebookatty import fetch_metadata
>>> file_path = "Romeo and Juliet - William Shakespeare.mobi"
>>> data = fetch_metadata(file_path)
>>> pprint(data)
{'author': 'William Shakespeare',
'codec': 'utf-8',
'codepage': '65001',
'contributor': 'calibre (0.7.23) [http://calibre-ebook.com]',
'description': 'SUMMARY: These violent delights have violent ends And in '
'their triumph die, like fire and powder, Which, as they kiss, '
'consume. When Romeo first lays eyes on the bewitching Juliet, '
"it's love at first sight. But though their love runs true and "
'deep, it is also completely forbidden. With family and fate '
'determined to keep them apart, will Romeo and Juliet find a '
"way to be together? William Shakespeare's masterpiece is one "
'of the most enduring stories of star-crossed love of all '
'time. Beautifully presented for a modern teen audience with '
'both the original play and a prose retelling of the beloved '
'story, this is the must-have edition of a timeless classic.',
'doctype': 'EXTH; MOBI',
'filetype': '.mobi',
'identity': 'BOOKMOBI',
'isbn': '9780061965494',
'langid': '9',
'name': 'Romeo and Juliet - William Shakespeare',
'published': '2009-08-15T07:00:00+00:00',
'publisher': 'HarperCollins',
'rights': 'NONE',
'subject': 'William; Renaissance; Vendetta; Juvenile Nonfiction; Historical; '
"Children's Books; Man-woman relationships; Historical - "
"Renaissance; Love & Romance; Playscripts (Children's; Shakespeare "
'plays; Shakespeare; Irish; 1564-1616; YA); General; Young Adult '
'Graphic Novels; Classics; Young Adult General Interest & Leisure; '
"Children's Books - Young Adult Fiction; Welsh; Juliet (Fictitious "
'character); Children: Young Adult (Gr. 7-9); Conflict of '
'generations; Verona (Italy); Romeo (Fictitious character); '
'Juvenile Fiction; Scottish; Drama; English',
'title': 'Romeo and Juliet',
'type': '2',
'unique_id': '1974853891',
'updatedtitle': 'Romeo and Juliet',
'version': '0; 6'}
```

### Example CLI Use

__example 1__
```bash
Expand Down
1,421 changes: 0 additions & 1,421 deletions a.txt

This file was deleted.

Binary file added ebookatty.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions ebookatty/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
#########################################################################
"""__init__ module for application."""

from ebookatty.metadata import MetadataFetcher
from ebookatty.metadata import MetadataFetcher, fetch_metadata
from ebookatty.cli import execute

__version__ = "0.3.1"

__all__ = ["MetadataFetcher", "execute"]
__all__ = ["MetadataFetcher", "execute", "fetch_metadata"]
28 changes: 20 additions & 8 deletions ebookatty/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,17 @@
#########################################################################
"""Utility functions and methods."""

from glob import glob
from pathlib import Path
import argparse
import json
import sys
import argparse
import csv
from glob import glob
from pathlib import Path
from typing import List

from ebookatty import MetadataFetcher

def find_matches(files: list) -> list:

def find_matches(files: List[str]) -> List[str]:
"""
Search list and find matching file paths that fit patterns.
Expand All @@ -55,8 +56,17 @@ def execute():
This is the applications main entrypoint and CLI implementation.
"""
parser = argparse.ArgumentParser(description="get ebook metadata", prefix_chars="-")
parser.add_argument('file', help='path to ebook file(s), standard file pattern extensions are allowed.', nargs=1)
parser.add_argument('-o', '--output', help='file path where metadata will be written. Acceptable formats include json and csv and are determined based on the file extension. Default is None', action="store")
parser.add_argument(
"file",
help="path to ebook file(s), standard file pattern extensions are allowed.",
nargs=1,
)
parser.add_argument(
"-o",
"--output",
help="file path where metadata will be written. Acceptable formats include json and csv and are determined based on the file extension. Default is None",
action="store",
)
if len(sys.argv[1:]) == 0:
sys.argv.append("-h")
args = parser.parse_args(sys.argv[1:])
Expand All @@ -67,10 +77,12 @@ def execute():
fetcher = MetadataFetcher(match)
data = fetcher.get_metadata()
datas.append(data)
if not args.output:
fetcher.show_metadata()
if args.output:
path = Path(args.output)
if path.suffix == ".json":
json.dump(datas, open(path,"wt"))
json.dump(datas, open(path, "wt"))
elif path.suffix == ".csv":
d = set()
for row in datas:
Expand Down
25 changes: 14 additions & 11 deletions ebookatty/epub.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import zipfile
from pathlib import Path
from xml.etree import ElementTree as ET

from ebookatty.standards import OPF_TAGS


Expand Down Expand Up @@ -51,7 +52,7 @@ def __init__(self, path: str):
meta = self.iterer(root)
for key, val in meta.items():
if val:
val = '; '.join([str(i) for i in set(val)])
val = "; ".join([str(i) for i in set(val)])
if val == "en":
val = "English"
meta[key] = val
Expand All @@ -77,7 +78,7 @@ def iterer(self, root: ET.Element) -> dict:
dict
all metadata extracted from element and its children
"""
pattern = re.compile(r'\{.*\}(\w+)')
pattern = re.compile(r"\{.*\}(\w+)")
match = pattern.findall(root.tag)[0]
if match in self.tags and root.text not in [None, "None", "NONE"]:
meta = {match: [root.text]}
Expand All @@ -86,8 +87,8 @@ def iterer(self, root: ET.Element) -> dict:
for element in root:
if element != root:
data = self.iterer(element)
for k,v in data.items():
meta.setdefault(k,[])
for k, v in data.items():
meta.setdefault(k, [])
meta[k].extend(v)
return meta

Expand All @@ -102,13 +103,15 @@ def get_opf(self) -> str:
str
the absolute path to the opf file contained in the ziparchive
"""
ns = {'n': 'urn:oasis:names:tc:opendocument:xmlns:container',
'pkg': 'http://www.idpf.org/2007/opf',
'dc': 'http://purl.org/dc/elements/1.1/'}
txt = self.epub_zip.read('META-INF/container.xml')
ns = {
"n": "urn:oasis:names:tc:opendocument:xmlns:container",
"pkg": "http://www.idpf.org/2007/opf",
"dc": "http://purl.org/dc/elements/1.1/",
}
txt = self.epub_zip.read("META-INF/container.xml")
tree = ET.fromstring(txt)
elems = tree.findall('n:rootfiles/n:rootfile', namespaces=ns)
elems = tree.findall("n:rootfiles/n:rootfile", namespaces=ns)
for elem in elems:
if 'full-path' in elem.attrib:
return elem.attrib['full-path']
if "full-path" in elem.attrib:
return elem.attrib["full-path"]
return None
59 changes: 50 additions & 9 deletions ebookatty/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@
Classes and functions for .azw, .azw3, and .kfx ebooks.
"""
from pathlib import Path
import shutil
from ebookatty import mobi, epub, standards
from pathlib import Path
from typing import Dict, Generator, Union

from ebookatty import epub, mobi, standards


class MetadataFetcher:
"""Primary Entrypoint for extracting metadata from most ebook filetypes."""
Expand All @@ -44,9 +47,9 @@ def __init__(self, path: str):
elif self.path.suffix in [".azw3", "azw", "kfx", ".mobi"]:
self.meta = mobi.Kindle(self.path)
else:
self.meta = {}
self.meta = mobi.Kindle(self.path)

def get_metadata(self) -> dict:
def show_metadata(self) -> Dict[str, str]:
"""
Call to start the extraction process.
Expand All @@ -63,6 +66,43 @@ def get_metadata(self) -> dict:
return self.metadata
return {}

def get_metadata(self) -> Dict[str, str]:
"""Retreive metadata from ebook.
Returns
-------
Dict[str, str]
metadata dictionary
"""
return self.meta.metadata


def fetch_metadata(path: Union[str | Path]) -> Dict[str, str]:
"""Retreive metadata for ebook located at the supplied file path.
Parameters
----------
path : Union[str | Path]
file path of the ebook.
Returns
-------
Dict[str, str]
Ebook metadata available.
"""
path = Path(path)
try:
if path.suffix.lower() == ".epub":
meta = epub.Epub(path)
elif path.suffix.lower() in [".azw3", "azw", "kfx", ".mobi"]:
meta = mobi.Kindle(path)
else:
meta = mobi.Kindle(path)
return meta.metadata
except Exception:
return None


def format_output(book: dict) -> str:
"""
Format the output for printing to STDOUT.
Expand Down Expand Up @@ -104,13 +144,14 @@ def format_output(book: dict) -> str:
text += extra + section + "\n"
output.append(text)
output = sorted(output, key=len)
output.insert(0,"\n" +("-" * long_line))
output.insert(0, "\n" + ("-" * long_line))
output.append(("-" * long_line) + "\n")
final = "\n".join(output)
print(final)
return output

def text_sections(section_size: int, text: str) -> str:

def text_sections(section_size: int, text: str) -> Generator:
"""
Split large text sections into smaller portions and yield result.
Expand All @@ -129,13 +170,13 @@ def text_sections(section_size: int, text: str) -> str:
Yields
------
Iterator[str]
Generator[str]
the next section of the divided text.
"""
while len(text) > section_size:
size = section_size
while text[size] != ' ':
while text[size] != " ":
size -= 1
yield text[:size]
text = text[size+1:]
text = text[size + 1 :]
yield text
Loading

0 comments on commit 17e5092

Please sign in to comment.