-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path__init__.py
198 lines (164 loc) · 6.77 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ApeSmit - A simple Python module to create XML sitemaps
# <http://www.florian-diesch.de/software/apesmit/>
# Copyright (C) 2008 Florian Diesch <[email protected]>
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import datetime, codecs
FREQ = {None, 'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'} #: values for changefreq
class Url(object):
"""
Class to handle a URL in `Sitemap`
"""
def __init__(self, loc, lastmod, changefreq, priority, escape=True):
"""
Constructor
:Parameters:
loc : string
Location (URL). See http://www.sitemaps.org/protocol.php#locdef
lastmod : ``datetime.date`` or ``string``
Date of last modification.
See http://www.sitemaps.org/protocol.php#lastmoddef
The ``today`` is replaced by today's date
changefreq : One of the values in `FREQ`
Expected frequency for changes.
See http://www.sitemaps.org/protocol.php#changefreqdef
priority : ``float`` or ``string``
Priority of this URL relative to other URLs on your site.
See http://www.sitemaps.org/protocol.php#prioritydef
escape
True if escaping for XML special characters should be done.
See http://www.sitemaps.org/protocol.php#escaping
"""
if escape:
self.loc = self.escape(loc)
else:
self.loc = loc
if lastmod == 'today':
lastmod = datetime.date.today().isoformat()
if lastmod is not None:
self.lastmod = unicode(lastmod)
else:
self.lastmod = None
if changefreq not in FREQ:
raise ValueError("Invalid changefreq value: '%s'" % changefreq)
if changefreq is not None:
self.changefreq = unicode(changefreq)
else:
self.changefreq = None
if priority is not None:
self.priority = unicode(priority)
else:
self.priority = None
self.urls = []
def escape(self, s):
"""
Escaping XML special chracters
:Parameters:
s
String to escape
:return: Escaped string
"""
s = s.replace('&', '&')
s = s.replace("'", ''')
s = s.replace('"', '&quod;')
s = s.replace('>', '>')
s = s.replace('<', '<')
return s
class Sitemap(object):
"""
Class to manage a sitemap
"""
def __init__(self, lastmod=None, changefreq=None, priority=None):
"""
Constructor
:Parameters:
lastmod
Default value for `lastmod`. See `Url.__init__()`.
changefreq
Default value for `changefreq`. See `Url.__init__()`.
priority
Default value for `priority`. See `Url.__init__()`.
"""
self.lastmod = lastmod
self.changefreq = changefreq
self.priority = priority
self.urls = []
def add(self, loc, lastmod=None, changefreq=None, priority=None, escape=True):
"""
Add a new URl. Parameters are the same as in `Url.__init__()`.
If ``lastmod``, ``changefreq`` or ``priority`` is ``None`` the default
value is used (see `__init__()`)
"""
if lastmod is None:
lastmod = self.lastmod
if changefreq is None:
changefreq = self.changefreq
if priority is None:
priority = self.priority
self.urls.append(Url(loc, lastmod, changefreq, priority, escape))
def write(self, out, type='url'):
"""
Write sitemap to ``out``
:Parameters:
out
file name or anything with a ``write()`` method
"""
if isinstance(out, basestring):
try:
output = codecs.open(out, 'w', 'utf-8')
except Exception, e:
print "Can't open file: %s" % (str(e),)
return
else:
output = out
if type == 'url':
self.write_url(output)
elif type == 'sitemapindex':
self.write_index(output)
if output is not out:
output.close()
def write_url(self, output):
output.write("<?xml version='1.0' encoding='UTF-8'?>\n"
'<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n'
' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n'
' http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"\n'
' xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n')
for url in self.urls:
lastmod = changefreq = priority = ''
if url.lastmod is not None:
lastmod = ' <lastmod>%s</lastmod>\n' % url.lastmod
if url.changefreq is not None:
changefreq = ' <changefreq>%s</changefreq>\n' % url.changefreq
if url.priority is not None:
priority = ' <priority>%s</priority>\n' % url.priority
output.write(" <url>\n"
" <loc>%s</loc>\n%s%s%s"
" </url>\n" % (url.loc.decode('utf-8'),
lastmod.decode('utf-8'),
changefreq.decode('utf-8'),
priority.decode('utf-8')))
output.write('</urlset>\n')
return output
def write_index(self, output):
output.write('<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">')
for url in self.urls:
lastmod = ''
if url.lastmod is not None:
lastmod = ' <lastmod>%s</lastmod>\n' % url.lastmod
output.write(" <sitemap>\n"
" <loc>%s</loc>\n%s"
" </sitemap>\n" % (url.loc.decode('utf-8'),
lastmod.decode('utf-8')))
output.write('</sitemapindex>\n')
return output