-
Notifications
You must be signed in to change notification settings - Fork 71
Notecase import script
jaap-karssenberg edited this page Oct 25, 2013
·
1 revision
file "notecase2zim.py":
#!/usr/bin/python
# Simple script to convert NoteCase Document to a Zim notebook folder
#
# NoteCase reference:
# http://notecase.sourceforge.net/ (Free version, discontinued)
# http://www.virtual-sky.com/ (Pro version)
#
# Based on BeautifulSoup (you need to install it before running notecase2zim):
# http://www.crummy.com/software/BeautifulSoup/
#
# Adapted to my use of NoteCase and Zim => other may want to adapt it
# For instance:
# Color "red" in NoteCase => I use "italic" in Zim
# Background Color "grey" in NoteCase => Title 3 in Zim
#
# Usage :
# -------
# 1. Save NoteCase document to .ncd format (plain text, no compression)
# 2. This script assumes the name is "notecase.ncd". This can be changed below
# 3. Run: python notecase2zim.py
# 4. Get a Folder named "notecase.zim" with the main file "notebook.zim" inside
#
# v1.1
# Jigho 2011
# Contact: https://launchpad.net/~jigho
#
import os
import shutil
import sys
import re
import datetime
sys.path.append('./BeautifulSoup')
from BeautifulSoup import BeautifulSoup
notecasefile = 'notecase.ncd'
def create_file_zim():
# You may change the name and endofline mode here
fileZim = open('notebook.zim', 'w')
fileZim.write('[Notebook]\nname=Notes\nversion=0.4\nendofline=dos')
fileZim.close()
def process_title(titre, date):
# Some titles are plain, but some have information that we do not use in Zim
if (titre.span):
m = titre.span.contents
titre2 = str(m[1])
elif (titre.string):
titre2 = titre.string
else:
m = titre.contents
titre2 = str(m[1])
# Delete white space, / and " in the filename
output1 = str(titre2 + '.txt').replace(' ', '_')
output2 = output1.replace('/', '')
output3 = output2.replace('\"', '')
output = unicode(output3, 'utf-8', errors='ignore')
# Some verbose, usefull on large contents
# to be aware that the program is still processing...
print 'Creating file: ', output
fileOut = open(output, 'w')
# Standard information at the start of any Zim file
fileOut.write('Content-Type: text/x-zim-wiki\n')
fileOut.write('Wiki-Format: zim 0.4\n')
fileOut.write('Creation-Date: ' + str(date) + '\n')
fileOut.write('\n====== ' + titre2 + ' ======\n')
fileOut.write('\n')
return fileOut
def create_subdir(repertoire):
rep = repertoire.name.replace('.txt', '')
os.mkdir(rep)
os.chdir(rep)
def process_format(c, fichier, formatString):
# for basic formatting tags (underline, bold, italic,...)
# do the core job
newLine = False
# Open Wiki format
fichier.write(formatString)
# Another trick in case of formatted content ends with a newline
# I then prefer to close the formatting tag and then write the
# new line without formatting
if (len(c.contents) > 1):
if (c.contents[-2].__class__.__name__ == 'Tag'):
if (c.contents[-2].name == 'br'):
c.contents[-2].extract()
c.contents[-1].extract()
newLine = True
# Process content (recursively !)
process_content(c, fichier, formatString)
# Close Wiki format
fichier.write(formatString)
# End of the trick for content finishing with a newline
if newLine:
fichier.write('\n')
def process_content(contenu, fichier, currentFormat):
# "currentFormat" is a trick to close the Wiki format at end of each line
# even if the format is applied to multi-lines
# Nota: this trick would need to be be enhanced
# when multiple formats are nested
for c in contenu:
if (c.__class__.__name__ == 'Tag'):
# <dl> tag stands for new note, ie new Zim file
if c.name == 'dl':
create_subdir(fichier)
process_page(c)
os.chdir('..')
# <br> tag stands for new line
# use the "currentFormat" trick to properly close format tag
# and then reopen it on the the new line
elif c.name == 'br':
fichier.write(currentFormat)
fichier.write('\n')
fichier.write(currentFormat)
# <u> tag stands for underline
elif c.name == 'u':
process_format(c, fichier, '__')
# <b> tag stands for bold
elif c.name == 'b':
process_format(c, fichier, '**')
# <i> tag stands fr italic
elif c.name == 'i':
process_format(c, fichier, '//')
# <s> tag stands for strike-through
elif c.name == 's':
process_format(c, fichier, '~~')
# <span> tag can have different purposes according to arguments
elif c.name == 'span':
# Color "red" in NoteCase => I use "italic" in Zim
if (c['style'] == "color:#ff0000"):
process_format(c, fichier, '//')
# Color "blue" in NoteCase => I use "bold" in Zim
elif (c['style'] == "color:#0000ff"):
process_format(c, fichier, '**')
# Color "green" in NoteCase => I use "bold" in Zim
elif (c['style'] == "color:#00ff00"):
process_format(c, fichier, '**')
# Background Color "grey" in NoteCase => Title 3 in Zim
elif (c['style'] == "background-color:#bfbfbf"):
fichier.write('===== ')
# Don't not use the "currentFormat" trick,
# since title format is not symetrical
#(which add difficulty)
# and Zim seems to autoclose this format at the end of line
process_content(c, fichier, currentFormat)
currentFormat = ''
# Other <span> contents are treated as plain text
# You may add more cases according to your needs
else:
print "WARNING : unknown SPAN type", c.attrs
process_content(c, fichier, currentFormat)
# <p> tag is not taken into account
elif c.name == 'p':
process_content(c, fichier, currentFormat)
# <a> tag stands for links
elif c.name == 'a':
fichier.write('[[')
fichier.write(c['href'].encode('utf-8'))
fichier.write('|')
process_content(c, fichier, currentFormat)
fichier.write(']]')
# In case program encounter a Tag which is not dealt with
# according to your needs, you can then add specific bloc
else:
print 'WARNING, unknown tag: ', c.name
fichier.write(
'TAG ' + c.name + ' / ' + c.string.encode("UTF-8"))
else:
ligne = c.string.encode("UTF-8")
# Delete the new line symbol at start of the line
# This happens when there was a <br> just before
# but <br> is already taken into account
fichier.write(re.sub("^\n", '', ligne))
def process_page(page):
creation = datetime.date.today()
for a in page.contents:
if (a.__class__.__name__ == 'Tag'):
if a.name == 'dt':
fileOut = process_title(a, creation)
elif a.name == 'dd':
process_content(a.contents, fileOut, '')
elif (a.__class__.__name__ == 'Comment'):
m = re.match("<!--property:date_created=(.*)-->$", str(a))
if (m):
creation = datetime.date.fromtimestamp(float(m.group(1)))
def main(repertoire):
xml = open(notecasefile, 'r').read()
soup = BeautifulSoup(xml, convertEntities=BeautifulSoup.XML_ENTITIES)
level0 = soup.html.body.dl
os.chdir(repertoire)
create_file_zim()
process_page(level0)
if __name__ == '__main__':
zimdir = re.sub(".ncd$", ".zim", notecasefile)
#shutil.rmtree(zimdir)
os.mkdir(zimdir)
main(zimdir)