Skip to content

Commit

Permalink
Date parsing (dbrgn#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
frinkelpi committed Oct 22, 2016
1 parent 15f8180 commit 3fd1350
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 59 deletions.
6 changes: 4 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,15 @@ Usage
de -- von, nach, via, ab, an
fr -- de, à, via, départ, arrivée

You can also use natural time specifications in your language, like "now",
"immediately", "noon" or "midnight".
You can also use natural time and date specifications in your language, like
- "now", "immediately", "at noon", "at midnight",
- "tomorrow", "monday", "in 2 days", "22/11".

Examples:
fahrplan from thun to burgdorf
fahrplan via bern nach basel von zürich, helvetiaplatz ab 15:35
fahrplan de lausanne à vevey arrivée minuit
fahrplan from Bern to Zurich departure 13:00 monday
fahrplan -p proxy.mydomain.ch:8080 de lausanne à vevey arrivée minuit

.. image:: https://raw.github.com/dbrgn/fahrplan/master/screenshot.png
Expand Down
7 changes: 4 additions & 3 deletions fahrplan/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,15 @@ def main():
+ u' de -- von, nach, via, ab, an\n'
+ u' fr -- de, à, via, départ, arrivée\n'
+ u'\n'
+ u' You can also use natural time specifications in your language, like "now",\n'
+ u' "immediately", "noon" or "midnight".\n'
+ u' You can also use natural time and date specifications in your language, like:\n'
+ u' - "now", "immediately", "at noon", "at midnight",\n'
+ u' - "tomorrow", "monday", "in 2 days", "22/11".\n'
+ u'\n'
+ u'Examples:\n'
+ u' fahrplan from thun to burgdorf\n'
+ u' fahrplan via bern nach basel von zürich, helvetiaplatz ab 15:35\n'
+ u' fahrplan de lausanne à vevey arrivée minuit\n'
+ u' fahrplan from Bern to Zurich departure 13:00 monday\n'
+ u' fahrplan -p proxy.mydomain.ch:8080 de lausanne à vevey arrivée minuit\n'
+ u'\n', formatter_class=argparse.RawDescriptionHelpFormatter, prog=meta.title, description=meta.description, add_help=False)
parser.add_argument("--full", "-f", action="store_true", help="Show full connection info, including changes")
Expand Down Expand Up @@ -89,7 +91,6 @@ def main():
except ValueError as e:
perror('Error:', e)
sys.exit(1)

# 2. API request
data = getConnections(args, (output_format == Formats.FULL), proxy_host)
connections = data["connections"]
Expand Down
158 changes: 113 additions & 45 deletions fahrplan/parser.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,45 @@
# -*- coding: utf-8 -*-
from __future__ import print_function, division, absolute_import, unicode_literals

from datetime import datetime
from datetime import datetime, timedelta
import re
import logging

import six

keywords = {
'de': {
'now': ['jetzt', 'sofort', 'nun'],
'noon': ['mittag'],
'midnight': ['mitternacht'],
'today': ["heute"],
'tomorrow': ["morgen"],
'at': ['um', 'am'],
'days': [r'in (\d+) tagen'],
'weekdays': ["montag", "dienstag", "mittwoch", "donnerstag", "freitag", "samstag", "sonntag"],
},
'en': {
'now': ['now', 'right now', 'immediately'],
'noon': ['noon'],
'midnight': ['midnight'],
'today': ["today"],
'tomorrow': ["tomorrow"],
'at': ['at'],
'days': [r'in (\d+) days'],
'weekdays': ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"],
},
'fr': {
'now': ['maitenant'],
'noon': ['midi'],
'midnight': ['minuit'],
'today': ["aujourd'hui"],
'tomorrow': ["demain"],
'days': [r"dans (\d+) jours"],
'at': [], # TODO: "à" clashes with top level keywords
'weekdays': ["lundi", "mardi", "mercredi", "jeudi", "vendredi", "samedi", "dimanche"],
},
}


def _process_tokens(tokens, sloppy_validation=False):
"""Parse input tokens.
Expand Down Expand Up @@ -98,12 +131,65 @@ def intersection_count(a, b):
return language


def _parse_time(timestring, language):
def _parse_date(datestring, keywords):
"""Parse date tokens.
Args:
datestring: String containing a date specification.
keywords: Language keywords
Returns:
date string.
Raises:
ValueError: If time could not be parsed.
"""
date = None
days_shift = None
# Keywords
for i, d in enumerate(["today", "tomorrow"]):
if any([t in datestring for t in keywords[d]]):
days_shift = i
# Weekdays
for i, d in enumerate(keywords["weekdays"]):
if d in datestring:
days_shift = i - datetime.now().weekday()
if days_shift <= 0:
days_shift += 7
# Shifts
if days_shift == None:
for pattern in keywords["days"]:
days_re = re.search(pattern, datestring)
if days_re:
try:
days_shift = int(days_re.group(1))
except:
pass
if days_shift != None:
return datetime.now() + timedelta(days=days_shift)
# Regular date strings
for dateformat in [[r"(\d{2}/\d{2}/\d{4})", "%d/%m/%Y"], [r"(\d{2}/\d{2})", "%d/%m"]]:
days_re = re.search(dateformat[0], datestring)
if days_re:
try:
date = datetime.strptime(days_re.group(1), dateformat[1])
except:
continue
if date.year == 1900:
date = date.replace(year=datetime.now().year)
break
if date != None:
return date.strftime("%Y/%m/%d")
return None


def _parse_time(timestring, keywords):
"""Parse time tokens.
Args:
timestring: String containing a time specification.
language: The language string (e.g. 'en' or 'de').
keywords: Language keywords
Returns:
Time string.
Expand All @@ -113,49 +199,23 @@ def _parse_time(timestring, language):
"""

keywords = {
'de': {
'now': ['jetzt', 'sofort', 'nun'],
'noon': ['mittag'],
'midnight': ['mitternacht'],
'at': ['um', 'am'],
},
'en': {
'now': ['now', 'right now', 'immediately'],
'noon': ['noon'],
'midnight': ['midnight'],
'at': ['at'],
},
'fr': {
'now': ['maitenant'],
'noon': ['midi'],
'midnight': ['minuit'],
'at': [], # TODO: "à" clashes with top level keywords
},
}

try:
kws = keywords[language]
except IndexError:
raise ValueError('Invalid language: "%s"!' % language)

# Ignore "at" keywords
if timestring.split(' ', 1)[0] in kws['at']:
if timestring.split(' ', 1)[0] in keywords['at']:
timestring = timestring.split(' ', 1)[1]

# Parse regular time strings
regular_time_match = re.match(r'([0-2]?[0-9])[:\-\. ]([0-9]{2})', timestring)
# regular_time_match = re.search(r'([0-2]?[0-9])[:\-\. ]([0-9]{2})', timestring)
regular_time_match = re.search(r'(\d{2}):(\d{2})', timestring)
if regular_time_match:
return ':'.join(regular_time_match.groups())

if timestring.lower() in kws['now']:
timestring = timestring.lower()
if timestring in keywords['now']:
return datetime.now().strftime('%H:%M')
if timestring.lower() in kws['noon']:
if timestring in keywords['noon']:
return '12:00'
if timestring.lower() in kws['midnight']:
if timestring in keywords['midnight']:
return '23:59' # '00:00' would be the first minute of the day, not the last one.

raise ValueError('Time string "%s" could not be parsed.' % timestring)
raise ValueError('Time is missing or could not be parsed')


def parse_input(tokens):
Expand All @@ -179,17 +239,25 @@ def parse_input(tokens):
departure *and* arrival time are specified.
"""

# Process tokens, get data dict and language
data, language = _process_tokens(tokens)

if data == {}:
return data, language
try:
kws = keywords[language]
except IndexError:
raise ValueError('Invalid language: "%s"!' % language)
# Map keys
if 'departure' in data:
data['time'] = _parse_time(data['departure'], language)
del data['departure']
if 'arrival' in data:
data['time'] = _parse_time(data['arrival'], language)
data['isArrivalTime'] = 1
del data['arrival']
for t in ["departure", "arrival"]:
if t in data:
data["time"] = _parse_time(data[t], kws)
date = _parse_date(data[t], kws)
if date is not None:
data["date"] = date
if t == "arrival":
data['isArrivalTime'] = 1
del data[t]

logging.debug('Data: ' + repr(data))
return data, language
Expand Down
22 changes: 14 additions & 8 deletions fahrplan/tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from __future__ import print_function, division, absolute_import, unicode_literals

import sys
import datetime
from datetime import datetime, timedelta

#import envoy
from subprocess import Popen, PIPE
Expand Down Expand Up @@ -111,7 +111,7 @@ def testBasicArrivalTime(self):
self.assertEqual(expected, parser.parse_input(tokens)[0])

def testImmediateTimes(self):
now = datetime.datetime.now().strftime('%H:%M')
now = datetime.now().strftime('%H:%M')
queries = [
'von basel nach bern ab jetzt'.split(),
'von basel nach bern ab sofort'.split(),
Expand Down Expand Up @@ -154,7 +154,18 @@ def testAtTimes(self):
for tokens in queries:
data, _ = parser.parse_input(tokens)
self.assertEqual('12:00', data['time'])

def testDates(self):
year = datetime.now().year
queries = [
'von basel nach bern ab 22/10/2016 13:00'.split(),
'von basel nach bern ab um 22/10 13:00'.split(),
'from basel to bern departure 22/10 13:00'.split(),
'from basel to bern departure 22/10 13:00'.split(),
]
for tokens in queries:
data, _ = parser.parse_input(tokens)
self.assertEqual('13:00', data['time'])
self.assertEqual('{}/10/22'.format(year), data['date'])

class TestBasicQuery(unittest.TestCase):

Expand Down Expand Up @@ -210,26 +221,21 @@ def testBasicQuery(self):


# class TestTablePrinter(unittest.TestCase):

# def setUp(self):
# self.output = StringIO()
# self.stdout = sys.stdout
# sys.stdout = self.output

# def tearDown(self):
# self.output.close()
# sys.stdout = self.stdout

# def testSeparator(self):
# printer = Tableprinter((3, 4, 5), ' ')
# printer.print_separator('*')
# self.assertEqual('******************\n', self.output.getvalue())

# def testPartialSeparator(self):
# printer = Tableprinter((2, 2, 3, 2), '+|+')
# printer.print_separator(cols=[1, 2])
# self.assertEqual(' +|+--+|+---+|+ +|+\n', self.output.getvalue())

# def testLine(self):
# printer = Tableprinter((4, 5, 6), '|')
# printer.print_line(('Eggs', 'Bacon', 'Spam'))
Expand Down
2 changes: 1 addition & 1 deletion test.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash
tox
flake8 --ignore E501,E128,W503 fahrplan/*.py
flake8 --ignore E501,E128,W503,E711 fahrplan/*.py

0 comments on commit 3fd1350

Please sign in to comment.