fixed hypen issue

johnykvsky · Nov 20, 2020 · 2784e1c · 2784e1c
1 parent f257ff0
commit 2784e1c
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 7 deletions.
diff --git a/__main__.py b/__main__.py
@@ -10,7 +10,7 @@
 __license__ = 'GNU Affero GPL v3'
 __copyright__ = '2014, Robert Błaut [email protected]'
 __appname__ = 'epubQTools'
-numeric_version = (0, 8)
+numeric_version = (0, 9)
 __version__ = '.'.join(map(str, numeric_version))
 __author__ = 'Robert Błaut <[email protected]>'
 

diff --git a/lib/epubqfix.py b/lib/epubqfix.py
@@ -51,7 +51,7 @@
     ))
 MY_LANGUAGE = 'pl'
 MY_LANGUAGE2 = 'pl-PL'
-HYPHEN_MARK = '\\u00AD'
+HYPHEN_MARK = '\u00AD'
 
 HOME = os.path.expanduser("~")
 DTD = ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" '
@@ -1520,7 +1520,7 @@ def remove_wm_info(opftree, rootepubdir):
                 alltexts = wmtree.xpath('//xhtml:body//text()',
                                         namespaces=XHTMLNS)
                 alltext = ' '.join(alltexts)
-                alltext = alltext.replace('\\u00AD', '').strip()
+                alltext = alltext.replace('\u00AD', '').strip()
                 if (
                     alltext == 'Plik jest zabezpieczony znakiem wodnym' or
                     'Ten ebook jest chroniony znakiem wodnym' in alltext or
@@ -1566,18 +1566,18 @@ def process_xhtml_file(xhfile, opftree, _resetmargins, skip_hyph, opf_path,
     for key in list(entities.keys()):
         c = c.replace(key, entities[key])
     try:
-        xhtree = etree.fromstring(c.encode('utf-8'), parser=etree.XMLParser(recover=False))
+        xhtree = etree.fromstring(c.encode('utf-8'), parser=etree.XMLParser(recover=False, encoding='utf-8'))
     except etree.XMLSyntaxError as e:
         if ('XML declaration allowed only at the start of the '
                 'document' in str(e).decode(SFENC)):
             xhtree = etree.fromstring(c[c.find('<?xml'):],
-                                      parser=etree.XMLParser(recover=False))
+                                      parser=etree.XMLParser(recover=False, encoding='utf-8'))
         elif re.search('Opening and ending tag mismatch: body line \d+ and '
                        'html', str(e).decode(SFENC)):
             try:
                 xhtree = etree.fromstring(
                     c.replace('</html>', '</body></html>'),
-                    parser=etree.XMLParser(recover=False)
+                    parser=etree.XMLParser(recover=False, encoding='utf-8')
                 )
             except:
                 print('* File skipped: ' + os.path.basename(xhfile) +
@@ -1713,7 +1713,7 @@ def process_epub(_tempdir, _replacefonts, _resetmargins,
         except OSError:
             pass
 
-    parser = etree.XMLParser(remove_blank_text=True)
+    parser = etree.XMLParser(remove_blank_text=True, encoding='utf-8')
     try:
         opftree = etree.parse(opf_file_path_abs, parser)
     except (etree.XMLSyntaxError, IOError) as e: