version 4.0

Historical commit version 4.0 (16.10.2013).
nzbget · Oct 16, 2013 · a3ee92c · a3ee92c
1 parent 86603c5
commit a3ee92c
Show file tree

Hide file tree

Showing 12 changed files with 172 additions and 46 deletions.
diff --git a/ChangeLog.txt b/ChangeLog.txt
@@ -1,10 +1,20 @@
+videosort-4.0:
+  - improved detection of obfuscated file and directory names;
+  - added support for DNZB-Headers "X-DNZB-ProperName", "X-DNZB-EpisodeName",
+    "X-DNZB-MovieYear";
+  - removed support for DNZB-Header "X-DNZB-UseNZBName";
+  - new option "DNZBHeaders" to disable using of DNZB-Headers if necessary;
+  - new format specifier "%imdb" and "%cpimdb" (thanks Chris Hamilton for the patch);
+  - removing invalid characters from generated file name;
+  - updated guessit-library to the newest release - this fixes several issues.
+
 videosort-3.0:
- - added for seasoned TV shows: if year in the file name goes directly after
-   show name, it will be added to show name. This may be necessary for
-   media players like XBMC, Boxee or Plex (or anyone using TheTVDB) to
-   properly index TV show. New option option "SeriesYear";
- - added detection of obfuscated file names; if such file name is detected
-   a nzb-name is used instead.
+  - added for seasoned TV shows: if year in the file name goes directly after
+    show name, it will be added to show name. This may be necessary for
+    media players like XBMC, Boxee or Plex (or anyone using TheTVDB) to
+    properly index TV show. New option option "SeriesYear";
+  - added detection of obfuscated file names; if such file name is detected
+    a nzb-name is used instead.
 
 videosort-2.0:
   - new options "TvCategories", "OtherTvDir" and "OtherTvFormat" for TV shows, whose file names look like movies (neither series nor dated shows);

diff --git a/VideoSort.py b/VideoSort.py
@@ -36,7 +36,7 @@
 # Author: Andrey Prygunkov ([email protected]).
 # Web-site: http://nzbget.sourceforge.net/VideoSort.
 # License: GPLv3 (http://www.gnu.org/licenses/gpl.html).
-# PP-Script Version: 3.0.
+# PP-Script Version: 4.0.
 #
 # NOTE: This script requires Python 2.x to be installed on your system.
 
@@ -106,6 +106,8 @@
 # %y			  - year;
 # %decade         - two-digits decade (90, 00, 10);
 # %0decade        - four-digits decade (1990, 2000, 2010).
+# %imdb           - IMDb ID;
+# %cpimdb         - IMDb ID (formatted for CouchPotato);
 #
 # Common specifiers (for movies, series and dated tv shows):
 # %dn             - original directory name (nzb-name);
@@ -191,6 +193,13 @@
 # This option has effect on "case-adjusted"-specifiers.
 #UpperWords=III,II,IV
 
+# Use information from Direct-NZB headers (yes, no).
+#
+# NZB-sites may provide extended information about videos,
+# which is usually more confident than the information extracted
+# from file names.
+#DNZBHeaders=yes
+
 # Overwrite files at destination (yes, no).
 #
 # If not active the files are still moved into destination but
@@ -224,7 +233,6 @@
 sys.path.append(dirname(__file__) + '/lib')
 
 import os
-import string
 import traceback
 import re
 import shutil
@@ -288,17 +296,18 @@
 series_year=os.environ.get('NZBPO_SERIESYEAR', 'yes') == 'yes'
 
 tv_categories=os.environ['NZBPO_TVCATEGORIES'].lower().split(',')
-category=os.environ.get('NZBPP_CATEGORY', '');
+category=os.environ.get('NZBPP_CATEGORY', '')
 force_tv=category.lower() in tv_categories
 
-force_nzbname=os.environ.get('NZBPR__DNZB_USENZBNAME', '').lower() == 'yes'
+dnzb_headers=os.environ.get('NZBPO_DNZBHEADERS', 'yes') == 'yes'
+dnzb_proper_name=os.environ.get('NZBPR__DNZB_PROPERNAME', '')
+dnzb_episode_name=os.environ.get('NZBPR__DNZB_EPISODENAME', '')
+dnzb_movie_year=os.environ.get('NZBPR__DNZB_MOVIEYEAR', '')
+dnzb_more_info=os.environ.get('NZBPR__DNZB_MOREINFO', '')
 
 if preview:
 	print('[WARNING] *** PREVIEW MODE ON - NO CHANGES TO FILE SYSTEM ***')
 
-if verbose and force_nzbname:
-	print('[INFO] Forcing use of nzb-name (X-DNZB-UseNZBName)')
-
 if verbose and force_tv:
 	print('[INFO] Forcing TV sorting (category: %s)' % category)
 
@@ -425,7 +434,7 @@ def cleanup_download_dir():
 	'  ': ' ',
 	'//': '/',
 	' - - ': ' - ',
-	'__': '_'
+	'--': '-'
 }
 
 def path_subst(path, mapping):
@@ -458,9 +467,9 @@ def get_titles(name, titleing=False):
 	a lot of little hacks to make it better and for more control
 	'''
 
-	title = name.replace('.', ' ').replace('_', ' ')
-	title = title.strip().strip('(').strip('_').strip('-').strip().strip('_')
-
+	#make valid filename
+	title = re.sub('[\"\:\?\*\\\/\<\>\|]', ' ', name)
+ 
 	if titleing:
 		title = titler(title) # title the show name so it is in a consistant letter case
 
@@ -702,6 +711,10 @@ def add_movies_mapping(guess, mapping):
 	mapping.append(('%decade', decade))
 	mapping.append(('%0decade', decade_two))
 
+	# imdb
+	mapping.append(('%imdb', guess.get('imdb', '')))
+	mapping.append(('%cpimdb', guess.get('cpimdb', '')))
+
 def add_dated_mapping(guess, mapping):
 
 	# title
@@ -753,22 +766,110 @@ def add_dated_mapping(guess, mapping):
 	mapping.append(('%d', day))
 	mapping.append(('%0d', day.rjust(2, '0')))
 
-def guess_info(filename):
-	""" Parses the filename using guessit-library """
+def os_path_split(path):
+    parts = []
+    while True:
+        newpath, tail = os.path.split(path)
+        if newpath == path:
+            if path: parts.append(path)
+            break
+        parts.append(tail)
+        path = newpath
+    parts.reverse()
+    return parts
+
+def deobfuscate_path(filename):
+	start = os.path.dirname(download_dir)
+	new_name = filename[len(start)+1:]
+	if verbose:
+		print('stripped filename: %s' % new_name)
 
-	use_nzbname = force_nzbname
+	parts = os_path_split(new_name)
+	if verbose:
+		print(parts)
 
-	if not use_nzbname:
-		fn = os.path.splitext(os.path.basename(filename))[0]
+	part_removed = 0
+	for x in range(0, len(parts)-1):
+		fn = parts[x]
 		if fn.find('.')==-1 and fn.find('_')==-1 and fn.find(' ')==-1:
-			print("Detected obfuscated filename %s, using NZB-Name instead" % os.path.basename(filename))
-			use_nzbname = True
-
-	if use_nzbname:
-		guessfilename = os.path.join(os.path.dirname(filename), os.path.basename(download_dir)) + os.path.splitext(filename)[1]
+			print('Detected obfuscated directory name %s, removing from guess path' % fn)
+			parts[x] = None
+			part_removed += 1
+
+	fn = os.path.splitext(parts[len(parts)-1])[0]
+	if fn.find('.')==-1 and fn.find('_')==-1 and fn.find(' ')==-1:
+		print('Detected obfuscated filename %s, removing from guess path' % os.path.basename(filename))
+		parts[len(parts)-1] = '-' + os.path.splitext(filename)[1]
+		part_removed += 1
+
+	if part_removed < len(parts):
+		new_name = ''
+		for x in range(0, len(parts)):
+			if parts[x] != None:
+				new_name = os.path.join(new_name, parts[x])
 	else:
-		guessfilename = filename
+		print("All file path parts are obfuscated, using obfuscated NZB-Name")
+		new_name = os.path.basename(download_dir) + os.path.splitext(filename)[1]
+
+	return new_name
+
+def remove_year(title):
+	""" Removes year from series name (if exist) """
+	m = re.compile('..*(\((19|20)\d\d\))').search(title)
+	if not m:
+		m = re.compile('..*((19|20)\d\d)').search(title)
+	if m:
+		if verbose:
+			print('Removing year from series name')
+		title = title.replace(m.group(1), '').strip()
+	return title
 
+def apply_dnzb_headers(guess):
+	""" Applies DNZB headers (if exist) """
+
+	dnzb_used = False
+	if dnzb_proper_name != '':
+		dnzb_used = True
+		if verbose:
+			print('Using DNZB-ProperName')
+		if guess['vtype'] == 'series':
+			proper_name = dnzb_proper_name
+			if not series_year:
+				proper_name = remove_year(proper_name)
+			guess['series'] = proper_name
+		else:
+			guess['title'] = dnzb_proper_name
+
+	if dnzb_episode_name != '' and guess['vtype'] == 'series':
+		dnzb_used = True
+		if verbose:
+			print('Using DNZB-EpisodeName')
+		guess['title'] = dnzb_episode_name
+
+	if dnzb_movie_year != '':
+		dnzb_used = True
+		if verbose:
+			print('Using DNZB-MovieYear')
+		guess['year'] = dnzb_movie_year
+
+	if dnzb_more_info != '':
+		dnzb_used = True
+		if verbose:
+			print('Using DNZB-MoreInfo')
+		if guess['type'] == 'movie':
+			regex = re.compile(r'^http://www.imdb.com/title/(tt[0-9]+)/$', re.IGNORECASE)
+			matches = regex.match(dnzb_more_info)
+			if matches:
+				guess['imdb'] = matches.group(1)
+				guess['cpimdb'] = 'cp(' + guess['imdb'] + ')'
+
+	if verbose and dnzb_used:
+		print(guess.nice_string())
+
+def guess_info(filename):
+	""" Parses the filename using guessit-library """
+
+	guessfilename = deobfuscate_path(filename)
 	if verbose:
 		print('Guessing: %s' % guessfilename)
 
@@ -818,6 +919,9 @@ def guess_info(filename):
 	elif guess['type'] == 'episode':
 		guess['vtype'] = 'series'
 
+	if dnzb_headers:
+		apply_dnzb_headers(guess)
+
 	if verbose:
 		print('Type: %s' % guess['vtype'])
 
@@ -829,7 +933,7 @@ def construct_path(filename):
 	if verbose:
 		print("filename: %s" % filename)
 
-	guess = guess_info(filename);
+	guess = guess_info(filename)
 	type = guess.get('vtype')
 	mapping = []
 	add_common_mapping(filename, guess, mapping)

diff --git a/lib/guessit/ISO-3166-1_utf8.txt b/lib/guessit/ISO-3166-1_utf8.txt
diff --git a/lib/guessit/ISO-639-2_utf-8.txt b/lib/guessit/ISO-639-2_utf-8.txt
diff --git a/lib/guessit/__init__.py b/lib/guessit/__init__.py
@@ -20,7 +20,7 @@
 
 from __future__ import unicode_literals
 
-__version__ = '0.7-dev'
+__version__ = '0.7.dev0'
 __all__ = ['Guess', 'Language',
            'guess_file_info', 'guess_video_info',
            'guess_movie_info', 'guess_episode_info']

diff --git a/lib/guessit/fileutils.py b/lib/guessit/fileutils.py
@@ -44,13 +44,14 @@ def split_path(path):
     result = []
     while True:
         head, tail = os.path.split(path)
+        headlen = len(head)
 
         # on Unix systems, the root folder is '/'
-        if head == '/' and tail == '':
+        if head and head == '/'*headlen and tail == '':
             return ['/'] + result
 
         # on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\
-        if ((len(head) == 3 and head[1:] == ':\\') or (len(head) == 2 and head == '\\\\')) and tail == '':
+        if ((headlen == 3 and head[1:] == ':\\') or (headlen == 2 and head == '\\\\')) and tail == '':
             return [head] + result
 
         if head == '' and tail == '':
@@ -61,6 +62,7 @@ def split_path(path):
             path = head
             continue
 
+        # otherwise, add the last path fragment and keep splitting
         result = [tail] + result
         path = head
 

diff --git a/lib/guessit/guess.py b/lib/guessit/guess.py
@@ -181,7 +181,7 @@ def choose_string(g1, g2):
     elif v1l in v2l:
         return (v1, combined_prob)
 
-    # in case of conflict, return the one with highest priority
+    # in case of conflict, return the one with highest confidence
     else:
         if c1 > c2:
             return (v1, c1 - c2)

diff --git a/lib/guessit/matcher.py b/lib/guessit/matcher.py
@@ -21,7 +21,7 @@
 from __future__ import unicode_literals
 from guessit import PY3, u, base_text_type
 from guessit.matchtree import MatchTree
-from guessit.textutils import normalize_unicode
+from guessit.textutils import normalize_unicode, clean_string
 import logging
 
 log = logging.getLogger(__name__)
@@ -62,9 +62,9 @@ def __init__(self, filename, filetype='autodetect', opts=None):
         (for more info, see guess.matchtree.to_string)
 
 
-         Second, it tries to merge all this information into a single object
-         containing all the found properties, and does some (basic) conflict
-         resolution when they arise.
+        Second, it tries to merge all this information into a single object
+        containing all the found properties, and does some (basic) conflict
+        resolution when they arise.
         """
 
         valid_filetypes = ('autodetect', 'subtitle', 'video',
@@ -84,6 +84,11 @@ def __init__(self, filename, filetype='autodetect', opts=None):
             opts = opts.split()
 
         self.match_tree = MatchTree(filename)
+
+        # sanity check: make sure we don't process a (mostly) empty string
+        if clean_string(filename) == '':
+            return
+
         mtree = self.match_tree
         mtree.guess.set('type', filetype, confidence=1.0)
 

diff --git a/lib/guessit/patterns.py b/lib/guessit/patterns.py
@@ -49,7 +49,7 @@
                   #(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<bonusNumber>(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),
 
                   # ... 2x13 ...
-                  (r'[^0-9](?P<season>[0-9]{1,2})[^0-9]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)),
+                  (r'[^0-9](?P<season>[0-9]{1,2})[^0-9 .-]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)),
 
                   # ... s02 ...
                   #(sep + r's(?P<season>[0-9]{1,2})' + sep, 0.6, (1, -1)),

diff --git a/lib/guessit/slogging.py b/lib/guessit/slogging.py
@@ -31,14 +31,15 @@
 RESET_FONT = "\x1B[0m"
 
 
-def setupLogging(colored=True, with_time=False, with_thread=False, filename=None):
+def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False):
     """Set up a nice colored logger as the main application logger."""
 
     class SimpleFormatter(logging.Formatter):
         def __init__(self, with_time, with_thread):
             self.fmt = (('%(asctime)s ' if with_time else '') +
                         '%(levelname)-8s ' +
-                        '[%(name)s:%(funcName)s]' +
+                        '[%(name)s:%(funcName)s' +
+                        (':%(lineno)s' if with_lineno else '') + ']' +
                         ('[%(threadName)s]' if with_thread else '') +
                         ' -- %(message)s')
             logging.Formatter.__init__(self, self.fmt)
@@ -47,7 +48,8 @@ class ColoredFormatter(logging.Formatter):
         def __init__(self, with_time, with_thread):
             self.fmt = (('%(asctime)s ' if with_time else '') +
                         '-CC-%(levelname)-8s ' +
-                        BLUE_FONT + '[%(name)s:%(funcName)s]' +
+                        BLUE_FONT + '[%(name)s:%(funcName)s' +
+                        (':%(lineno)s' if with_lineno else '') + ']' +
                         RESET_FONT + ('[%(threadName)s]' if with_thread else '') +
                         ' -- %(message)s')
 

diff --git a/lib/guessit/transfo/guess_episodes_rexps.py b/lib/guessit/transfo/guess_episodes_rexps.py
@@ -44,11 +44,6 @@ def guess_episodes_rexps(string):
             span = (match.start() + span_adjust[0],
                     match.end() + span_adjust[1])
 
-            # episodes which have a season > 30 are most likely errors
-            # (Simpsons is at 24!)
-            if int(guess.get('season', 0)) > 30:
-                continue
-
             # decide whether we have only a single episode number or an
             # episode list
             if guess.get('episodeNumber'):