-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr_api.py
94 lines (87 loc) · 3.41 KB
/
ocr_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
'''
Author: Victor-kawai [email protected]
Date: 2024-04-11 18:31:28
LastEditors: Victor-kawai [email protected]
LastEditTime: 2024-05-22 11:47:09
FilePath: \毕设\code\ocr_api.py
Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
'''
import requests
import json
import os
# 本地文件
def ocr_space_file(filename, overlay=False, api_key='K82023489588957', language='chs'):
""" OCR.space API request with local file.
Python3.5 - not tested on 2.7
:param filename: Your file path & name.
:param overlay: Is OCR.space overlay required in your response.
Defaults to False.
:param api_key: OCR.space API key.
Defaults to 'helloworld'.
:param language: Language code to be used in OCR.
List of available language codes can be found on https://ocr.space/OCRAPI
Defaults to 'en'.
:return: Result in JSON format.
"""
payload = {'isOverlayRequired': overlay,
'apikey': api_key,
'language': language,
'detectOrientation': 'true',
'scale': 'true',
'OCREngine': 2,
}
with open(filename, 'rb') as f:
r = requests.post('https://api.ocr.space/parse/image',
files={filename: f},
data=payload,
)
test_file = r.content.decode()
print(json.loads(test_file))
txt = json.loads(test_file)['ParsedResults'][0]['ParsedText']
print(txt)
return txt
# 远程文件
def ocr_space_url(url, overlay=False, api_key='helloworld', language='chs'):
""" OCR.space API request with remote file.
Python3.5 - not tested on 2.7
:param url: Image url.
:param overlay: Is OCR.space overlay required in your response.
Defaults to False.
:param api_key: OCR.space API key.
Defaults to 'helloworld'.
:param language: Language code to be used in OCR.
List of available language codes can be found on https://ocr.space/OCRAPI
Defaults to 'en'.
:return: Result in JSON format.
"""
payload = {'url': url,
'isOverlayRequired': overlay,
'apikey': api_key,
'language': language,
}
r = requests.post('https://api.ocr.space/parse/image',
data=payload,
)
return r.content.decode()
def ocr_request(organization):
print("==== ocr识别开始 ====")
path = "元丰新制/"+organization+"/切分后"+organization
target_file_name = "元丰新制/"+organization+"/"+organization+"文本.md"
files = os.listdir(path)
f = open(target_file_name, "w", encoding="utf-8")
i = 0
for file in files:
f.write("## /page{"+file[0:file.rfind(".")]+"}")
print(file)
f.write("\n")
text = ocr_space_file(filename=path+'/'+file, language='chs')
#print(text)
f.write(text)
f.write("\n")
i += 1
f.close()
print("==== ocr识别结束 ====")
return target_file_name
if __name__ == "__main__":
organization = "宰执官类"
ocr_request(organization)