-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
259 lines (228 loc) · 9.71 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
import os
import sys
import zipfile
from util.log import logger
from shutil import copyfile
from parsers.gms import GmsDocParser
from parsers.dex import DexFileParser
from parsers.java import JavaDocParser
from parsers.single_java import SingleJavaDocParser
from parsers.facebook import FacebookDocParser
from parsers.javalike import JavaLikeDocParser
from parsers.pushwoosh import PushwooshDocParser
from parsers.appbrain import AppbrainDocParser
from parsers.silverjava import SilverJavaDocParser
from util.ResultChecker import process_results
from util.traverseFolder import get_first_layer_folders, get_first_layer_files, get_all_files
def parse_facebook_folder(target_folder):
# logger.info("Facebook Doc Folder=" + target_folder)
facebook_folders = get_first_layer_folders(target_folder)
for facebook_doc in facebook_folders:
# logger.info("Processing Facebook Doc=" + facebook_doc)
parser = FacebookDocParser(facebook_doc)
parser.run()
parser.print_results()
# parser.print_to_csv()
def parse_gms_folder():
logger.info('GMS')
parser = GmsDocParser()
parser.run()
parser.print_results()
parser.print_to_csv()
def parse_javalike_doc(target_folder):
# logger.info("Javalike Doc Folder=" + target_folder)
javalike_folders = get_first_layer_folders(target_folder)
for javalike_doc in javalike_folders:
# logger.info("Processing Javalike Doc=" + javalike_doc.split("\\")[-1])
doc_name = javalike_doc.split("\\")[-1]
print(doc_name)
parser = JavaLikeDocParser(javalike_doc)
parser.run()
# parser.print_results()
parser.print_to_csv()
def parse_current_javadoc_folder(target_folder):
print("Java Doc Folder=" + target_folder)
javadoc_folders = get_first_layer_folders(target_folder)
for javadoc in javadoc_folders:
print("Processing JavaDoc=" + javadoc.split("\\")[-1])
# print(javadoc)
parser = SingleJavaDocParser(javadoc)
parser.run()
parser.print_results()
parser.print_to_csv()
def parse_historical_javadoc_folder_(sdk_name, target_folder):
print("SDK Name=" + sdk_name + " Java Doc Folder=" + target_folder)
javadoc_folders = get_first_layer_folders(target_folder)
for javadoc in javadoc_folders:
print("Processing JavaDoc=" + javadoc.split("\\")[-1])
# print(javadoc)
parser = JavaDocParser(sdk_name, javadoc)
parser.run()
parser.print_results()
parser.print_to_csv()
def parse_appbrain_doc(target_folder):
# logger.info("AppBrain Folder=" + target_folder)
parser = AppbrainDocParser(target_folder)
print("AppBrain")
parser.run()
# parser.print_results()
parser.print_to_csv()
def parse_jar_folder(target_folder):
# logger.info("Jar Folder=" + target_folder)
jar_files = get_first_layer_files(target_folder, False)
for jar_file in jar_files:
if not jar_file.endswith(".jar"):
continue
jar_name = jar_file.split(os.sep)[-1]
print(jar_name)
sdk_name = jar_name[-4]
# logger.info("Processing File=" + jar_file)
try:
parser = DexFileParser(sdk_name, jar_file)
parser.run()
parser.print_results()
parser.print_to_csv()
except Exception as e:
print(jar_name + " meets exception!")
print(e)
# def parse_jar_folder(sdk_name, target_folder):
# # logger.info("Jar Folder=" + target_folder)
# jar_files = get_first_layer_files(target_folder, False)
# for jar_file in jar_files:
# if not jar_file.endswith(".jar"):
# continue
# jar_name = jar_file.split(os.sep)[-1]
# print(jar_file.split("\\")[-1])
# # logger.info("Processing File=" + jar_file)
# try:
# parser = DexFileParser(sdk_name, jar_file)
# parser.run()
# parser.print_results()
# # parser.print_to_csv()
# except Exception as e:
# print(jar_name + " meets exception!")
# print(e)
def parse_pushwoosh():
logger.info('Pushwoosh')
parser = PushwooshDocParser()
parser.run()
parser.print_results()
def parse_dex_folder(target_folder):
logger.info("Dex Folder=" + target_folder)
files = get_first_layer_files(target_folder, False)
# print(len(files))
for file in files:
try:
parser = DexFileParser(file)
parser.run()
parser.print_results()
# parser.print_to_csv()
except Exception as e:
print(e)
def parse_silverjava_doc(target_folder):
silverjava_folders = get_first_layer_folders(target_folder)
for doc_folder in silverjava_folders:
print("Processing Silver Java Doc=" + doc_folder.split(os.sep)[-1])
# print(javadoc)
parser = SilverJavaDocParser(doc_folder)
parser.run()
parser.print_results()
parser.print_to_csv()
def process_jar_package_folder(target_folder):
sdk_name = target_folder.split(os.sep)[-1]
parse_jar_folder(sdk_name, target_folder)
def process_javadoc_package_folder(target_folder):
file_list = get_first_layer_files(target_folder, html=False)
java_doc_folders = []
new_folder_path = target_folder + os.sep + "new_sdks"
for file in file_list:
if ".jar" in file:
# print("File=" + file)
zip_file = zipfile.ZipFile(file, "r")
sdk_name = file.split(os.sep)[-1][:-4]
# print("SDK_Name=" + sdk_name)
folder_path = file[0:file.rfind(os.sep)]
# print("Folder_Path=" + folder_path)
extract_path = folder_path + os.sep + sdk_name
# print("Extract_Path=" + extract_path)
zip_file.extractall(extract_path)
file_list = get_all_files(extract_path, html=True)
# print(extract_path)
black_list = ["allclasses", "constant-values", "deprecated-list", "help-doc", "index", "overview-",
"package-"]
html_list = []
for html_file in file_list:
file_name = html_file.split("\\")[-1]
neglect = False
for prefix in black_list:
if file_name.startswith(prefix):
neglect = True
break
if neglect:
continue
html_list.append(html_file)
new_sdk_folder = new_folder_path + os.sep + sdk_name + "_new"
next_folder = new_sdk_folder + os.sep + "All"
# print("new_folder=" + new_sdk_folder)
# print("next_folder=" + next_folder)
if not os.path.exists(new_folder_path):
os.mkdir(new_folder_path)
if not os.path.exists(new_sdk_folder):
os.mkdir(new_sdk_folder)
if not os.path.exists(next_folder):
os.mkdir(next_folder)
for html_file in html_list:
html_name = html_file.split(os.sep)[-1]
copyfile(html_file, next_folder + os.sep + html_name)
java_doc_folders.append(new_sdk_folder)
parse_historical_javadoc_folder_(new_folder_path.split("\\")[-2], new_folder_path)
def main():
# javadoc_folders = get_first_layer_folders("C:\\Users\\Rainy\\Lab_Project\\dataset_science\\API_Docs\\History\\javadoc_test")
# for javadoc_folder in javadoc_folders:
# print("Processing Folder=" + javadoc_folder)
# process_javadoc_package_folder(javadoc_folder)
# 解析历史版本Doc
# jar_folders = get_first_layer_folders("C:\\Users\\Rainy\\Lab_Project\\dataset_science\\API_Docs\\History\\jar_test")
# for jar_folder in jar_folders:
# print("Processing Jar Folder=" + jar_folder)
# process_jar_package_folder(jar_folder)
# if not os.path.exists("./api_results"):
# os.mkdir("./api_results")
parser_type = sys.argv[1]
target_folder = sys.argv[2]
# Config.target_folder = target_folder
# if parser_type.lower() == 'jar_folder':
# parse_jar_folder(target_folder)
# if parser_type.lower() == 'javadoc_folder':
# parse_javadoc_folder(target_folder)
# if parser_type.lower() == 'facebooks':
# parse_facebook_folder(target_folder)
# if parser_type.lower() == 'gms':
# parse_gms_folder()
# if parser_type.lower() == 'javalike_folder':
# parse_javalike_doc(target_folder)
# if parser_type.lower() == 'pushwoosh':
# parse_pushwoosh()
# if parser_type.lower() == "dexs":
# parse_dex_folder(target_folder)
# if parser_type.lower() == 'appbrain':
# parse_appbrain_doc(target_folder)
# if parser_type.lower() == 'silverjava':
# parse_silverjava_doc()
if parser_type.lower() == 'all_test':
jar_folder = "C:\\Users\\Rainy\\Lab_Project\\dataset_science\\API_Docs\\Android_Jars"
javadoc_folder = "C:\\Users\\Rainy\\Lab_Project\\dataset_science\\API_Docs\\Android_Docs\\Java"
facebook_doc_folder = "C:\\Users\\Rainy\\Lab_Project\\dataset_science\\API_Docs\\Android_Docs\\Facebook"
javalike_folder = "C:\\Users\\Rainy\\Lab_Project\\dataset_science\\API_Docs\\Android_Docs\\Javalike"
appbrain_folder = "C:\\Users\\Rainy\\Lab_Project\\dataset_science\\API_Docs\\Android_Docs\\AppBrain"
silverjava_folder = "C:\\Users\\Rainy\\Lab_Project\\dataset_science\\API_Docs\\Android_Docs\\Silverjava"
parse_jar_folder(jar_folder)
parse_current_javadoc_folder(javadoc_folder)
parse_facebook_folder(facebook_doc_folder)
parse_javalike_doc(javalike_folder)
parse_appbrain_doc(appbrain_folder)
parse_silverjava_doc(silverjava_folder)
# parse_pushwoosh()
# process_results()
if __name__ == '__main__':
main()