-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathValidateJobs.py
158 lines (135 loc) · 5.2 KB
/
ValidateJobs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python3
import sys
import yaml
def getFileGenerator(file):
for line in yaml.parse(open(file).read()):
yield(line)
def getFile(file):
with open(file, 'r') as fileStream:
return yaml.safe_load(fileStream)
def checkCompaniesFileForDuplicates(companiesGenerator):
for i in range(3):
next(companiesGenerator)
keys = [next(companiesGenerator).value]
names = []
urls = []
errorCount = 0
firstDuplicateFound = False
for company in companiesGenerator:
if type(company) == yaml.events.ScalarEvent and company.value == 'name':
event = next(companiesGenerator)
if event.value not in names:
names.append(event.value)
else:
errorCount += 1
if not firstDuplicateFound:
firstDuplicateFound = True
print('There are duplicates found in the companies.yml file:')
print(f'\tDuplicate company name found on line {event.start_mark.line}: {event.value}\n')
if type(company) == yaml.events.ScalarEvent and company.value == 'url':
event = next(companiesGenerator)
if event.value not in urls:
urls.append(event.value)
else:
errorCount += 1
if not firstDuplicateFound:
firstDuplicateFound = True
print('There are duplicates found in the companies.yml file:')
print(f'\tDuplicate company url found on line {event.start_mark.line}: {event.value}\n')
if type(company) == yaml.events.MappingEndEvent:
event = next(companiesGenerator)
if type(event) == yaml.events.ScalarEvent and event.value not in keys:
keys.append(event.value)
elif type(event) == yaml.events.ScalarEvent:
errorCount += 1
if not firstDuplicateFound:
firstDuplicateFound = True
print('There are duplicates found in the companies.yml file:')
print(f'\tDuplicate company key found on line {event.start_mark.line}: {event.value}\n')
return errorCount
def validateAgainstCompaniesList(companies, jobs, jobsGenerator, addNewline):
missing = [j['company'] for j in jobs if j['company'] not in companies]
if len(missing) > 0:
if addNewline:
print('\n')
print('Cannot find the following companies in the companies.yml file:')
event = next(jobsGenerator)
while(type(event) != yaml.events.StreamEndEvent):
if type(event) == yaml.events.ScalarEvent and event.value in missing:
print(f'\tLine {event.start_mark.line+1} of jobs.yml: {event.value}\n')
event = next(jobsGenerator)
return len(missing)
def checkForDuplicateCompanies(jobsGenerator, addNewline):
companies = []
errorCount = 0
firstDuplicateFound = False
for job in jobsGenerator:
if type(job) == yaml.events.ScalarEvent and job.value == 'company':
nextValue = next(jobsGenerator)
if nextValue.value not in companies:
companies.append(nextValue.value)
else:
errorCount += 1
if not firstDuplicateFound:
if addNewline:
print('\n')
firstDuplicateFound = True
print('There are duplicate companies in the jobs.yml file:')
print(f'\tDuplicate company found on line {nextValue.start_mark.line+1}: {nextValue.value}\n')
return errorCount
def checkForDuplicateLinks(jobsGenerator, ignoredUrls, addNewline):
urls = []
firstDuplicateFound = False
company = ''
errorCount = 0
for job in jobsGenerator:
if type(job) == yaml.events.ScalarEvent and job.value == 'company':
company = next(jobsGenerator).value
if type(job) == yaml.events.ScalarEvent and (job.value == 'link' or job.value == 'indeed'):
nextValue = next(jobsGenerator)
if nextValue.value in ignoredUrls:
continue
if nextValue.value not in urls:
urls.append(nextValue.value)
else:
errorCount += 1
if not firstDuplicateFound:
if addNewline:
print('\n')
firstDuplicateFound = True
print('There are duplicate urls in the jobs.yml file:')
print(f'\tDuplicate posting found for: {company}\n\t\tLine {nextValue.start_mark.line+1}: {nextValue.value}\n')
return errorCount
if __name__ == '__main__':
if len(sys.argv) < 2:
path = './'
else:
if sys.argv[1][-1] == '/':
path = sys.argv[1]
else:
path = sys.argv[1] + '/'
jobsFile = f'{path}_data/jobs.yml'
companiesFile = f'{path}_data/companies.yml'
lintingConfigFile = f'{path}_data/lint_config.yml'
resultA = 0
resultB = 0
resultC = 0
resultD = 0
addNewline = False
lintingConfig = getFile(lintingConfigFile)
resultA = checkCompaniesFileForDuplicates(getFileGenerator(companiesFile))
if resultA > 0:
addNewline = True
resultB = validateAgainstCompaniesList(getFile(companiesFile), getFile(jobsFile), getFileGenerator(jobsFile), addNewline)
if resultB > 0:
addNewline = True
resultC = checkForDuplicateCompanies(getFileGenerator(jobsFile), addNewline)
if resultC > 0:
addNewline = True
resultD = checkForDuplicateLinks(getFileGenerator(jobsFile), lintingConfig['duplication']['ignored_urls'], addNewline)
results = resultA + resultB + resultC + resultD
if results > 0:
print(f'\nThere were {results} errors found.')
exit(1)
print('No errors found.')
exit(0)