forked from Yukaii/CrawlerMaster
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ncnu_course_crawler.rb
118 lines (97 loc) · 4.43 KB
/
ncnu_course_crawler.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# 國立暨南國際大學
# 課程查詢網址:http://www.doc.ncnu.edu.tw/ncnu/index.php?option=com_content&view=article&id=324&Itemid=382&lang=tw
module CourseCrawler::Crawlers
class NcnuCourseCrawler < CourseCrawler::Base
PERIODS = CoursePeriod.find('NCNU').code_map
def initialize year: nil, term: nil, update_progress: nil, after_each: nil
@year = year
@term = term
@update_progress_proc = update_progress
@after_each_proc = after_each
@query_url = 'http://www.ncnu.edu.tw/ncnuweb/'
end
def courses
@courses = []
puts "get url ..."
doc = %x(curl -s '#{@query_url}services/course.aspx' --compressed)
depts = doc.scan(/\"(?<dep>\w\w?\d\d\d?)\"/)[1..-1]
# 要看有沒有必修要跑到另一個網頁看阿!!!
required = []
# depts_id = ["00","01","04","03","06","05","08","Z6","38","39","46","C2","12","11","13","14","19","18","Zc","45","29","22","21","23","24","00","00","00","28","00","02","07","09","00","35","00"]
depts_id = ["00","01","02","03","04","05","06","07","08","09","11","12","13","14","18","19","21","22","23","24","28","29","35","38","39","45","46","C2","Z6","Zc"]
["B","G","P"].each do |i|
depts_id.each do |dept|
puts "data crawled : " +i + "->" + dept
doc = %x(curl -s '#{@query_url}webservice/csvDepartRequireCourses.aspx?year=#{@year-1911}&deptid=#{dept}&class=#{i}' --compressed)
required += doc[1..-4].split("\"\r\n\"")[2..-1].map{|required_course| required_course.split("\",\"")[3]}
doc = %x(curl -s '#{@query_url}webservice/csvDepartGroupCourses.aspx?year=#{@year-1911}&deptid=#{dept}&class=#{i}' --compressed)
if doc[1..-4].split("\"\r\n\"")[2..-1] != nil
required += doc[1..-4].split("\"\r\n\"")[2..-1].map{|required_course| required_course.split("\",\"")[3]}
end
end
end
depts.each do |dept|
doc = %x(curl -s '#{@query_url}webservice/csvDepartOpenCourses.aspx?year=#{@year-1911}#{@term}&uid=#{dept[0]}' --compressed)
doc[1..-4].split("\"\r\n\"")[1..-1].each do |line|
# "學期別","開課系所","課程綱要(general_code)","課程名稱","開課教師","部別","年級","學分","時間","地點"
data = line.split("\",\"")
next if data[8].nil? # do not save course without period data
syllabus_url = "#{@query_url}webservice/csvDepartOpenCourseSyllabus.aspx?year=#{@year-1911}#{@term}&courseid=#{data[2]}"
course_days = []
course_periods = []
course_locations = []
data[8].scan(/(\d)([a-z]+)/).each do |day, period|
period.chars.each do |p|
course_days << day.to_i
course_periods << PERIODS[p]
course_locations << data[9]
end
end
course = {
year: @year, # 西元年
term: @term, # 學期 (第一學期=1,第二學期=2)
name: data[3], # 課程名稱
lecturer: data[5], # 授課教師
credits: data[8].to_i, # 學分數
code: "#{@year}-#{@term}-#{data[2]}",
general_code: data[2], # 選課代碼
url: syllabus_url, # 課程大綱之類的連結
required: required.include?(data[3]), # 必修或選修
department: data[1], # 開課系所
day_1: course_days[0],
day_2: course_days[1],
day_3: course_days[2],
day_4: course_days[3],
day_5: course_days[4],
day_6: course_days[5],
day_7: course_days[6],
day_8: course_days[7],
day_9: course_days[8],
period_1: course_periods[0],
period_2: course_periods[1],
period_3: course_periods[2],
period_4: course_periods[3],
period_5: course_periods[4],
period_6: course_periods[5],
period_7: course_periods[6],
period_8: course_periods[7],
period_9: course_periods[8],
location_1: course_locations[0],
location_2: course_locations[1],
location_3: course_locations[2],
location_4: course_locations[3],
location_5: course_locations[4],
location_6: course_locations[5],
location_7: course_locations[6],
location_8: course_locations[7],
location_9: course_locations[8],
}
@after_each_proc.call(course: course) if @after_each_proc
@courses << course
end
end
puts "Project fininshed !!!"
@courses
end
end
end