-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.coffee
186 lines (141 loc) · 5.88 KB
/
app.coffee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
require('dotenv').config()
cheerio = require 'cheerio'
fs = require 'fs'
{ write_file_with_template } = require './templater'
{ schematize } = require './schema'
{ path } = process.env
acts_and_links = require './acts-and-links'
intro = fs.readFileSync 'templates/intro.html'
switch_class_for_tag = (class_name, tag) ->
$ class_name
.each (i, e) ->
id = $(e).text()
.replace /(\?)/g, ''
$(e).replaceWith "<#{tag} id='#{id}'>#{$(e).html()}</#{tag}>"
remove_wrapping_elements = (array_of_selectors) ->
array_of_selectors.forEach (selector) ->
$ selector
.each (i, e) ->
$(e).replaceWith $(e).html()
remove_chapter_number = (text) -> text.replace /(\d+\. ?\t)/g, ''
get_chapter_number = (text) -> (text.match /^(\d+)\./)?[1]
section_to_filename = (section) ->
section
.replace /( )/g, '-'
.replace /(\t)/g, '-'
.replace /([:,’“”])/g, ''
tidy_citation = (citation) ->
citation
.replace /(ss? \w+(( – |-|, )\w+)?)/g, '<nobr>$1</nobr>'
.replace /(\.)$/, '' # trailing full stop
pas_text = fs.readFileSync "problems at school.html"
pas_text = pas_text.toString()
.replace //g, '✔' # fix bad checks & crosses
.replace //g, '✘'
.replace /<p class="body"> <\/p>/gi, '' # get rid of some empty paras
.replace /<p class="Heading-4-first"> <\/p>/gi, ''
.replace /([A-Z]{2}[A-Z]+)/g, '<span class="smallcaps">$1</span>'
$ = cheerio.load pas_text
remove_wrapping_elements ['div', 'div', 'div', '.xref-box', '.xref', '.xref', '.url', '.chap-num', '.chapter-for-running-head', 'span:not([class!=""])']
$('.char-style-override-3').remove() # bullets
$('.char-style-override-5').remove() # numbers
$('.No-Paragraph-Style').remove()
$('.frame-2').remove() # images
$('img').remove()
switch_class_for_tag 'p.Heading-1', 'h1'
switch_class_for_tag 'p.Heading-2', 'h2'
switch_class_for_tag 'p.Heading-3', 'h3'
switch_class_for_tag 'p.Heading-3-first', 'h3'
switch_class_for_tag 'p.Heading-4', 'h4'
switch_class_for_tag 'p.Heading-4-first', 'h4'
switch_class_for_tag 'p.Heading-5', 'h5'
# I don't know why I need to do this
fs.writeFileSync "export/problems at school.html", $.html(), 'utf8', console.error
$ = cheerio.load fs.readFileSync "export/problems at school.html"
# clean up links & anchors
$('a').each (i, e) ->
id = $(e).attr 'id'
if id
if $(e).parent().is('h1') or $(e).parent().is('h2')
section = remove_chapter_number $(e).parent().text()
else
section = $(e).parent().prevAll('h2').eq(0).text()
$("a[href='##{id}']").attr 'href', section_to_filename section + '.html#' + id
no_page_refs = $(e).text().replace /, page \d+/g, ''
$(e).text(no_page_refs)
# link up legislation
$('p.legislation').each (i, e) ->
citations = $(e).text().split(/; ?/)
links = ''
citations.forEach (citation) ->
link = acts_and_links citation
citation = tidy_citation citation
if link
links += "<a target=\"legislation\" href=\"#{link}\">#{citation}</a><br/>\n"
else
links += "#{citation}<br/>"
if links then $(e).html links
# make phone numbers clickable
$('p').each (i, e) ->
$(e).html $(e).html().replace /((0800|\(0\d\))[\d ]{3,8})/g, "<span itemprop=\"telephone\" content=\"$1\"><a href=\"tel:$1\">$1</strong></a></span>"
# build main & chapter contents pages
pages = []
main_contents = "<ul class=\"contents\">\n"
$('h1').each (i, e) ->
h1_heading = remove_chapter_number $(e).text()
h1_file = section_to_filename h1_heading + '.html'
h1_image = section_to_filename h1_heading + '.jpg'
h1_breadcrumb = "<p class=\"breadcrumb\"><a href=\"../index.html\">Student Rights</a>\n > <a href=\"index.html\">Problems at School</a>\n > #{h1_heading}</p>"
content = h1_breadcrumb + "<h1>#{h1_heading}</h1>\n" + $(e).nextUntil('h2,h1')
full_content = $(e).nextUntil('h1')
if get_chapter_number $(e).text()
main_contents += "<li><a href=\"#{h1_file}\"><img src=\"../img/chapter/#{h1_image}\" />#{h1_heading}</a>\n"
else
main_contents += "<li><a href=\"#{h1_file}\">#{h1_heading}</a>\n"
schematize full_content, $
sections = []
index_before_section = pages.length
full_content.filter('h2').each (i, e) ->
h2_heading = $(e).text().replace(/(\t)/g, ' ')
h2_file = section_to_filename h2_heading + '.html'
h2_breadcrumb = "<p class=\"breadcrumb\"><a href=\"../index.html\">Student Rights</a>\n > <a href=\"index.html\">Problems at School</a> > <a href=\"#{h1_file}\">#{h1_heading}</a> > #{h2_heading}</p>\n"
h2_content = h2_breadcrumb + "<h2>#{h2_heading}</h2>\n" + $(e).nextUntil('h2,h1')
pages.push
file: h2_file
title: h2_heading
content: h2_content
sections.push
section: h2_heading
file: h2_file
section_contents = "<ul class=\"contents\">\n"
sections.forEach (e) ->
section_contents += "<li><a href=\"#{e.file}\">#{e.section}</a></li>\n"
section_contents += "</ul>\n"
main_contents += section_contents + "</li>\n"
pages.splice index_before_section, 0,
file: h1_file
title: h1_heading
content: content + section_contents
main_contents += "</ul>"
pages.unshift
file: 'index.html'
title: 'Problems at School'
content: intro + main_contents
pages.forEach (page, index) ->
if index > 0 then prev = pages[index - 1].file else prev = null
if index < pages.length - 1 then next = pages[index + 1].file else next = null
write_file_with_template page.file, page.title, page.content, prev, next
headings = []
['h5', 'h4', 'h3', 'h2', 'h1'].forEach (selector) ->
$(selector).each (i, e) ->
id = ''
label = remove_chapter_number $(e).text()
if selector is 'h1' or selector is 'h2'
section = remove_chapter_number $(e).text()
else
section = $(e).prevAll('h2,h1').eq(0).text()
id = $(e).attr('id')
headings.push
label: label
link: "#{section_to_filename section}.html##{id}"
fs.writeFileSync "#{path}/headings.json", JSON.stringify(headings, null, 4), 'utf8', console.error