forked from Yukaii/CrawlerMaster
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cycu_course_crawler.rb
158 lines (141 loc) · 4.92 KB
/
cycu_course_crawler.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
##
# 中原大學課程爬蟲
# 查詢:https://itouch.cycu.edu.tw/active_system/CourseQuerySystem/
#
module CourseCrawler::Crawlers
class CycuCourseCrawler < CourseCrawler::Base
include ::CourseCrawler::DSL
PERIODS = CoursePeriod.find('CYCU').code_map
def initialize year: current_year, term: current_term, update_progress: nil, after_each: nil, params: nil
@year = year || current_year
@term = term || current_term
@update_progress_proc = update_progress
@after_each_proc = after_each
end
def courses detail: false
@courses = []
@threads = []
puts "get url ..."
url = "https://itouch.cycu.edu.tw/active_system/CourseQuerySystem/GetCourses.jsp?yearTerm=#{@year-1911}#{@term}"
#r = RestClient.get(url)
r = %x{curl https://itouch.cycu.edu.tw/active_system/CourseQuerySystem/GetCourses.jsp?yearTerm=#{@year-1911}#{@term}}
# r = HTTPClient.new.get_content(url).force_encoding('utf-8')
data = r.strip
rows = data.split('@@')
rows[1..-1].each_with_index do |row, row_index|
datas = row.split('|')
unless datas[6].nil?
department_code = datas[6][0..1]
url = "http://cmap.cycu.edu.tw:8080/Syllabus/CoursePreview.html?yearTerm=#{@year-1911}#{@term}&opCode=#{datas[6]}"
end
required = datas[11].include?('必') unless datas[11].nil?
# Flatten timetable
course_days = []
course_periods = []
course_locations = []
times = []
location = datas[17]
# course_locations << (datas[17] && (datas[17].empty? ? nil : datas[17] ) )
# course_locations << (datas[19] && (datas[19].empty? ? nil : datas[19] ) )
# course_locations << (datas[21] && (datas[21].empty? ? nil : datas[21] ) )
times << datas[16]
times << datas[18]
times << datas[20]
times.each do |tim|
tim && tim.match(/(?<d>.)\-(?<p>.+)/) do |m|
m[:p].split("").each do |period|
course_days << m[:d].to_i
course_periods << PERIODS[period]
course_locations << location
end
end
end
lecturer_code = datas[15] && CGI.escape(datas[15]).tr('%', '')
puts "data crawled : " + datas[10]
course = {
# cros_inst: datas[1], # 跨部
# cros_dep: datas[2], # 跨系
# datas[4] # 停休與否
# pho_code: datas[5], # 語音代碼
year: @year,
term: @term,
code: "#{@year}-#{@term}-#{datas[6]}-#{lecturer_code}",
general_code: datas[6], # 課程代碼
# category: datas[7], # 課程類別
department: datas[8], # 權責單位?
department_code: department_code,
# clas: datas[9], # 開課班級
name: datas[10], # 課程名稱
required: required, # 必選修
# year: datas[12], # 全半年
# datas[13] # ?
credits: datas[14].to_i, # 學分
lecturer: datas[15], # 授課教師
# notes: datas[22], # 備註
# department: datas[23], # 權責單位?
# people: datas[24], # 開課人數
day_1: course_days[0],
day_2: course_days[1],
day_3: course_days[2],
day_4: course_days[3],
day_5: course_days[4],
day_6: course_days[5],
day_7: course_days[6],
day_8: course_days[7],
day_9: course_days[8],
period_1: course_periods[0],
period_2: course_periods[1],
period_3: course_periods[2],
period_4: course_periods[3],
period_5: course_periods[4],
period_6: course_periods[5],
period_7: course_periods[6],
period_8: course_periods[7],
period_9: course_periods[8],
location_1: course_locations[0],
location_2: course_locations[1],
location_3: course_locations[2],
location_4: course_locations[3],
location_5: course_locations[4],
location_6: course_locations[5],
location_7: course_locations[6],
location_8: course_locations[7],
location_9: course_locations[8],
url: url,
}
@courses << course
end
ThreadsWait.all_waits(*@threads)
@courses.uniq!
@threads = []
@courses.each {|course|
sleep(1) until (
@threads.delete_if { |t| !t.status }; # remove dead (ended) threads
@threads.count < ( (ENV['MAX_THREADS'] && ENV['MAX_THREADS'].to_i) || 20)
)
@threads << Thread.new do
@after_each_proc.call(course: course) if @after_each_proc
end
}
ThreadsWait.all_waits(*@threads)
puts "Project finished !!!"
@courses
end
# def batch_download_books
# codes = @courses.map {|c| c["code"]}
# codes.each do |c|
# puts "load #{c}"
# system("phantomjs spider.js #{c}")
# end
# end
# def map_book_data
# @courses.each do |c|
# filename = "book_datas/#{c[:code]}"
# if File.exist?(filename)
# textbook = Oj.load(File.read(filename))
# c[:textbook] = textbook
# end
# end
# end
end
end