-
Notifications
You must be signed in to change notification settings - Fork 0
/
overview-upload
executable file
·72 lines (59 loc) · 3.08 KB
/
overview-upload
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3
#
# Upload a file, or directory, to an Overview server.
# From https://github.com/overview/overview-upload-directory
import argparse
import logging
import os
import pathlib
from overview_upload import Upload, create_document_set
# ---- Main ----
def main():
parser = argparse.ArgumentParser(description='Upload a file or directory to an Overview server.')
parser.add_argument('file', help='file or directory to upload')
parser.add_argument('-t', '--token', help='API token corresponding to document set', required=True)
parser.add_argument('-s', '--server', help='url of Overview server, defaults to http://localhost:9000', default='http://localhost:9000')
parser.add_argument('--skip-duplicate', dest='skip_duplicate', help='Skip files already on the server', action="store_true")
parser.add_argument('-n', '--noskip', dest='skip_duplicate', help='Don\'t skip files already on server', action="store_false")
parser.add_argument('--split-by-page', dest='split_by_page', help='Turn input files into one document per page', action="store_true", default=False)
parser.add_argument('--lang', dest='lang', default='en', help='2-char ISO document language code (used for OCR and text analysis)')
group = parser.add_mutually_exclusive_group()
group.add_argument('--ocr', dest='ocr', help='Run OCR on PDF pages that are only images', action="store_true")
group.add_argument('--no-ocr', dest='ocr', help='Skip OCR always (for speed)', action="store_false")
parser.add_argument('--create-document-set-with-title', dest='create_with_title', help='Create a new document set and then add files')
parser.set_defaults(ocr=True, skip_duplicate=True)
args = parser.parse_args()
filename = args.file
logger = logging.getLogger("overview-upload")
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())
if not os.path.exists(filename):
print("Cannot find file or directory " + filename)
else:
if args.create_with_title:
response = create_document_set(args.server, args.token, args.create_with_title, logger=logger)
logger.info('Created document set "{}" with ID {}', args.create_with_title, response['documentSet']['id'])
api_token = response['apiToken']['token']
else:
api_token = args.token
upload = Upload(args.server, api_token, logger=logger)
upload.clear_previous_upload()
upload_kwargs = {
'skip_unhandled_extension': True,
'skip_duplicate': args.skip_duplicate,
}
if os.path.isdir(filename):
# Send a directory.
upload.send_directory(filename, **upload_kwargs)
else:
# Send a single file.
# use a basename on the server -- no directories
path = pathlib.Path(filename)
upload.send_path_if_conditions_met(path, filename=path.name, **upload_kwargs)
upload.finish(
ocr=args.ocr,
split_by_page=args.split_by_page,
lang=args.lang
)
if __name__ == '__main__':
main()