Skip to content
This repository has been archived by the owner on Nov 18, 2022. It is now read-only.

Commit

Permalink
version 4.0
Browse files Browse the repository at this point in the history
Historical commit version 4.0 (16.10.2013).
  • Loading branch information
hugbug committed Oct 16, 2013
1 parent 86603c5 commit a3ee92c
Show file tree
Hide file tree
Showing 12 changed files with 172 additions and 46 deletions.
22 changes: 16 additions & 6 deletions ChangeLog.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
videosort-4.0:
- improved detection of obfuscated file and directory names;
- added support for DNZB-Headers "X-DNZB-ProperName", "X-DNZB-EpisodeName",
"X-DNZB-MovieYear";
- removed support for DNZB-Header "X-DNZB-UseNZBName";
- new option "DNZBHeaders" to disable using of DNZB-Headers if necessary;
- new format specifier "%imdb" and "%cpimdb" (thanks Chris Hamilton for the patch);
- removing invalid characters from generated file name;
- updated guessit-library to the newest release - this fixes several issues.

videosort-3.0:
- added for seasoned TV shows: if year in the file name goes directly after
show name, it will be added to show name. This may be necessary for
media players like XBMC, Boxee or Plex (or anyone using TheTVDB) to
properly index TV show. New option option "SeriesYear";
- added detection of obfuscated file names; if such file name is detected
a nzb-name is used instead.
- added for seasoned TV shows: if year in the file name goes directly after
show name, it will be added to show name. This may be necessary for
media players like XBMC, Boxee or Plex (or anyone using TheTVDB) to
properly index TV show. New option option "SeriesYear";
- added detection of obfuscated file names; if such file name is detected
a nzb-name is used instead.

videosort-2.0:
- new options "TvCategories", "OtherTvDir" and "OtherTvFormat" for TV shows, whose file names look like movies (neither series nor dated shows);
Expand Down
150 changes: 127 additions & 23 deletions VideoSort.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
# Author: Andrey Prygunkov ([email protected]).
# Web-site: http://nzbget.sourceforge.net/VideoSort.
# License: GPLv3 (http://www.gnu.org/licenses/gpl.html).
# PP-Script Version: 3.0.
# PP-Script Version: 4.0.
#
# NOTE: This script requires Python 2.x to be installed on your system.

Expand Down Expand Up @@ -106,6 +106,8 @@
# %y - year;
# %decade - two-digits decade (90, 00, 10);
# %0decade - four-digits decade (1990, 2000, 2010).
# %imdb - IMDb ID;
# %cpimdb - IMDb ID (formatted for CouchPotato);
#
# Common specifiers (for movies, series and dated tv shows):
# %dn - original directory name (nzb-name);
Expand Down Expand Up @@ -191,6 +193,13 @@
# This option has effect on "case-adjusted"-specifiers.
#UpperWords=III,II,IV

# Use information from Direct-NZB headers (yes, no).
#
# NZB-sites may provide extended information about videos,
# which is usually more confident than the information extracted
# from file names.
#DNZBHeaders=yes

# Overwrite files at destination (yes, no).
#
# If not active the files are still moved into destination but
Expand Down Expand Up @@ -224,7 +233,6 @@
sys.path.append(dirname(__file__) + '/lib')

import os
import string
import traceback
import re
import shutil
Expand Down Expand Up @@ -288,17 +296,18 @@
series_year=os.environ.get('NZBPO_SERIESYEAR', 'yes') == 'yes'

tv_categories=os.environ['NZBPO_TVCATEGORIES'].lower().split(',')
category=os.environ.get('NZBPP_CATEGORY', '');
category=os.environ.get('NZBPP_CATEGORY', '')
force_tv=category.lower() in tv_categories

force_nzbname=os.environ.get('NZBPR__DNZB_USENZBNAME', '').lower() == 'yes'
dnzb_headers=os.environ.get('NZBPO_DNZBHEADERS', 'yes') == 'yes'
dnzb_proper_name=os.environ.get('NZBPR__DNZB_PROPERNAME', '')
dnzb_episode_name=os.environ.get('NZBPR__DNZB_EPISODENAME', '')
dnzb_movie_year=os.environ.get('NZBPR__DNZB_MOVIEYEAR', '')
dnzb_more_info=os.environ.get('NZBPR__DNZB_MOREINFO', '')

if preview:
print('[WARNING] *** PREVIEW MODE ON - NO CHANGES TO FILE SYSTEM ***')

if verbose and force_nzbname:
print('[INFO] Forcing use of nzb-name (X-DNZB-UseNZBName)')

if verbose and force_tv:
print('[INFO] Forcing TV sorting (category: %s)' % category)

Expand Down Expand Up @@ -425,7 +434,7 @@ def cleanup_download_dir():
' ': ' ',
'//': '/',
' - - ': ' - ',
'__': '_'
'--': '-'
}

def path_subst(path, mapping):
Expand Down Expand Up @@ -458,9 +467,9 @@ def get_titles(name, titleing=False):
a lot of little hacks to make it better and for more control
'''

title = name.replace('.', ' ').replace('_', ' ')
title = title.strip().strip('(').strip('_').strip('-').strip().strip('_')

#make valid filename
title = re.sub('[\"\:\?\*\\\/\<\>\|]', ' ', name)
if titleing:
title = titler(title) # title the show name so it is in a consistant letter case

Expand Down Expand Up @@ -702,6 +711,10 @@ def add_movies_mapping(guess, mapping):
mapping.append(('%decade', decade))
mapping.append(('%0decade', decade_two))

# imdb
mapping.append(('%imdb', guess.get('imdb', '')))
mapping.append(('%cpimdb', guess.get('cpimdb', '')))

def add_dated_mapping(guess, mapping):

# title
Expand Down Expand Up @@ -753,22 +766,110 @@ def add_dated_mapping(guess, mapping):
mapping.append(('%d', day))
mapping.append(('%0d', day.rjust(2, '0')))

def guess_info(filename):
""" Parses the filename using guessit-library """
def os_path_split(path):
parts = []
while True:
newpath, tail = os.path.split(path)
if newpath == path:
if path: parts.append(path)
break
parts.append(tail)
path = newpath
parts.reverse()
return parts

def deobfuscate_path(filename):
start = os.path.dirname(download_dir)
new_name = filename[len(start)+1:]
if verbose:
print('stripped filename: %s' % new_name)

use_nzbname = force_nzbname
parts = os_path_split(new_name)
if verbose:
print(parts)

if not use_nzbname:
fn = os.path.splitext(os.path.basename(filename))[0]
part_removed = 0
for x in range(0, len(parts)-1):
fn = parts[x]
if fn.find('.')==-1 and fn.find('_')==-1 and fn.find(' ')==-1:
print("Detected obfuscated filename %s, using NZB-Name instead" % os.path.basename(filename))
use_nzbname = True

if use_nzbname:
guessfilename = os.path.join(os.path.dirname(filename), os.path.basename(download_dir)) + os.path.splitext(filename)[1]
print('Detected obfuscated directory name %s, removing from guess path' % fn)
parts[x] = None
part_removed += 1

fn = os.path.splitext(parts[len(parts)-1])[0]
if fn.find('.')==-1 and fn.find('_')==-1 and fn.find(' ')==-1:
print('Detected obfuscated filename %s, removing from guess path' % os.path.basename(filename))
parts[len(parts)-1] = '-' + os.path.splitext(filename)[1]
part_removed += 1

if part_removed < len(parts):
new_name = ''
for x in range(0, len(parts)):
if parts[x] != None:
new_name = os.path.join(new_name, parts[x])
else:
guessfilename = filename
print("All file path parts are obfuscated, using obfuscated NZB-Name")
new_name = os.path.basename(download_dir) + os.path.splitext(filename)[1]

return new_name

def remove_year(title):
""" Removes year from series name (if exist) """
m = re.compile('..*(\((19|20)\d\d\))').search(title)
if not m:
m = re.compile('..*((19|20)\d\d)').search(title)
if m:
if verbose:
print('Removing year from series name')
title = title.replace(m.group(1), '').strip()
return title

def apply_dnzb_headers(guess):
""" Applies DNZB headers (if exist) """

dnzb_used = False
if dnzb_proper_name != '':
dnzb_used = True
if verbose:
print('Using DNZB-ProperName')
if guess['vtype'] == 'series':
proper_name = dnzb_proper_name
if not series_year:
proper_name = remove_year(proper_name)
guess['series'] = proper_name
else:
guess['title'] = dnzb_proper_name

if dnzb_episode_name != '' and guess['vtype'] == 'series':
dnzb_used = True
if verbose:
print('Using DNZB-EpisodeName')
guess['title'] = dnzb_episode_name

if dnzb_movie_year != '':
dnzb_used = True
if verbose:
print('Using DNZB-MovieYear')
guess['year'] = dnzb_movie_year

if dnzb_more_info != '':
dnzb_used = True
if verbose:
print('Using DNZB-MoreInfo')
if guess['type'] == 'movie':
regex = re.compile(r'^http://www.imdb.com/title/(tt[0-9]+)/$', re.IGNORECASE)
matches = regex.match(dnzb_more_info)
if matches:
guess['imdb'] = matches.group(1)
guess['cpimdb'] = 'cp(' + guess['imdb'] + ')'

if verbose and dnzb_used:
print(guess.nice_string())

def guess_info(filename):
""" Parses the filename using guessit-library """

guessfilename = deobfuscate_path(filename)
if verbose:
print('Guessing: %s' % guessfilename)

Expand Down Expand Up @@ -818,6 +919,9 @@ def guess_info(filename):
elif guess['type'] == 'episode':
guess['vtype'] = 'series'

if dnzb_headers:
apply_dnzb_headers(guess)

if verbose:
print('Type: %s' % guess['vtype'])

Expand All @@ -829,7 +933,7 @@ def construct_path(filename):
if verbose:
print("filename: %s" % filename)

guess = guess_info(filename);
guess = guess_info(filename)
type = guess.get('vtype')
mapping = []
add_common_mapping(filename, guess, mapping)
Expand Down
Empty file modified lib/guessit/ISO-3166-1_utf8.txt
100755 → 100644
Empty file.
Empty file modified lib/guessit/ISO-639-2_utf-8.txt
100755 → 100644
Empty file.
2 changes: 1 addition & 1 deletion lib/guessit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from __future__ import unicode_literals

__version__ = '0.7-dev'
__version__ = '0.7.dev0'
__all__ = ['Guess', 'Language',
'guess_file_info', 'guess_video_info',
'guess_movie_info', 'guess_episode_info']
Expand Down
6 changes: 4 additions & 2 deletions lib/guessit/fileutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,14 @@ def split_path(path):
result = []
while True:
head, tail = os.path.split(path)
headlen = len(head)

# on Unix systems, the root folder is '/'
if head == '/' and tail == '':
if head and head == '/'*headlen and tail == '':
return ['/'] + result

# on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\
if ((len(head) == 3 and head[1:] == ':\\') or (len(head) == 2 and head == '\\\\')) and tail == '':
if ((headlen == 3 and head[1:] == ':\\') or (headlen == 2 and head == '\\\\')) and tail == '':
return [head] + result

if head == '' and tail == '':
Expand All @@ -61,6 +62,7 @@ def split_path(path):
path = head
continue

# otherwise, add the last path fragment and keep splitting
result = [tail] + result
path = head

Expand Down
2 changes: 1 addition & 1 deletion lib/guessit/guess.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def choose_string(g1, g2):
elif v1l in v2l:
return (v1, combined_prob)

# in case of conflict, return the one with highest priority
# in case of conflict, return the one with highest confidence
else:
if c1 > c2:
return (v1, c1 - c2)
Expand Down
13 changes: 9 additions & 4 deletions lib/guessit/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from __future__ import unicode_literals
from guessit import PY3, u, base_text_type
from guessit.matchtree import MatchTree
from guessit.textutils import normalize_unicode
from guessit.textutils import normalize_unicode, clean_string
import logging

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -62,9 +62,9 @@ def __init__(self, filename, filetype='autodetect', opts=None):
(for more info, see guess.matchtree.to_string)
Second, it tries to merge all this information into a single object
containing all the found properties, and does some (basic) conflict
resolution when they arise.
Second, it tries to merge all this information into a single object
containing all the found properties, and does some (basic) conflict
resolution when they arise.
"""

valid_filetypes = ('autodetect', 'subtitle', 'video',
Expand All @@ -84,6 +84,11 @@ def __init__(self, filename, filetype='autodetect', opts=None):
opts = opts.split()

self.match_tree = MatchTree(filename)

# sanity check: make sure we don't process a (mostly) empty string
if clean_string(filename) == '':
return

mtree = self.match_tree
mtree.guess.set('type', filetype, confidence=1.0)

Expand Down
2 changes: 1 addition & 1 deletion lib/guessit/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
#(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<bonusNumber>(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)),

# ... 2x13 ...
(r'[^0-9](?P<season>[0-9]{1,2})[^0-9]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)),
(r'[^0-9](?P<season>[0-9]{1,2})[^0-9 .-]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)),

# ... s02 ...
#(sep + r's(?P<season>[0-9]{1,2})' + sep, 0.6, (1, -1)),
Expand Down
8 changes: 5 additions & 3 deletions lib/guessit/slogging.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@
RESET_FONT = "\x1B[0m"


def setupLogging(colored=True, with_time=False, with_thread=False, filename=None):
def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False):
"""Set up a nice colored logger as the main application logger."""

class SimpleFormatter(logging.Formatter):
def __init__(self, with_time, with_thread):
self.fmt = (('%(asctime)s ' if with_time else '') +
'%(levelname)-8s ' +
'[%(name)s:%(funcName)s]' +
'[%(name)s:%(funcName)s' +
(':%(lineno)s' if with_lineno else '') + ']' +
('[%(threadName)s]' if with_thread else '') +
' -- %(message)s')
logging.Formatter.__init__(self, self.fmt)
Expand All @@ -47,7 +48,8 @@ class ColoredFormatter(logging.Formatter):
def __init__(self, with_time, with_thread):
self.fmt = (('%(asctime)s ' if with_time else '') +
'-CC-%(levelname)-8s ' +
BLUE_FONT + '[%(name)s:%(funcName)s]' +
BLUE_FONT + '[%(name)s:%(funcName)s' +
(':%(lineno)s' if with_lineno else '') + ']' +
RESET_FONT + ('[%(threadName)s]' if with_thread else '') +
' -- %(message)s')

Expand Down
5 changes: 0 additions & 5 deletions lib/guessit/transfo/guess_episodes_rexps.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,6 @@ def guess_episodes_rexps(string):
span = (match.start() + span_adjust[0],
match.end() + span_adjust[1])

# episodes which have a season > 30 are most likely errors
# (Simpsons is at 24!)
if int(guess.get('season', 0)) > 30:
continue

# decide whether we have only a single episode number or an
# episode list
if guess.get('episodeNumber'):
Expand Down
Loading

0 comments on commit a3ee92c

Please sign in to comment.