-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPDF_TO_DOC_Coverter.py
34 lines (29 loc) · 1.34 KB
/
PDF_TO_DOC_Coverter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from pdf2docx import parse
from pdf2docx import Converter
from typing import Tuple
class PdfWordConverter:
def __init__(self, input_dpf_file):
self.input_pdf_file = input_dpf_file
self.output_docx_file = input_dpf_file[:-4] + '.docx' # temporary
def __convert_pdf2docx(self, input_file: str, output_file: str, pages: Tuple = None):
"""Converts pdf to docx"""
if pages:
pages = [int(i) for i in list(pages) if i.isnumeric()]
result = parse(pdf_file=input_file,
docx_with_path=output_file, pages=pages)
summary = {
"File": input_file, "Pages": str(pages), "Output File": output_file
}
# Printing Summary
print("## Summary ########################################################")
print("\n".join("{}:{}".format(i, j) for i, j in summary.items()))
print("###################################################################")
return result
def __newconverter(self,input_file: str, output_file: str):
cv = Converter(input_file)
cv.convert(docx_filename=output_file)
cv.close()
def convert_to_docx(self):
# self.__convert_pdf2docx(self.input_pdf_file, self.output_docx_file)
self.__newconverter(self.input_pdf_file, self.output_docx_file)
return self.output_docx_file