From ca4880589ced8f2c08ad548fec24536c26b802f6 Mon Sep 17 00:00:00 2001
From: Alistair Johnson <alistairewj@gmail.com>
Date: Tue, 19 Feb 2019 14:31:52 -0500
Subject: [PATCH 1/6] pep8

---
 negbio/pipeline/ptb2ud.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/negbio/pipeline/ptb2ud.py b/negbio/pipeline/ptb2ud.py
index 23118da..e7706ce 100644
--- a/negbio/pipeline/ptb2ud.py
+++ b/negbio/pipeline/ptb2ud.py
@@ -98,19 +98,22 @@ def __init__(self, lemmatizer, representation='CCprocessed', universal=False):
         Args:
             lemmatizer (Lemmatizer)
         """
-        super(NegBioPtb2DepConverter, self).__init__(lemmatizer, representation, universal)
+        super(NegBioPtb2DepConverter, self).__init__(
+            lemmatizer, representation, universal)
 
     def convert_doc(self, document):
         for passage in document.passages:
             for sentence in passage.sentences:
                 try:
-                    dependency_graph = self.convert(sentence.infons['parse tree'])
-                    anns, rels = convert_dg(dependency_graph, sentence.text, sentence.offset,
-                                            self.add_lemmas)
+                    dependency_graph = self.convert(
+                        sentence.infons['parse tree'])
+                    anns, rels = convert_dg(dependency_graph, sentence.text,
+                                            sentence.offset, self.add_lemmas)
                     sentence.annotations = anns
                     sentence.relations = rels
                 except:
-                    logging.exception("Cannot process sentence %d in %s", sentence.offset, document.id)
+                    logging.exception(
+                        "Cannot process sentence %d in %s", sentence.offset, document.id)
 
                 if not self.add_lemmas:
                     for ann in sentence.annotations:
@@ -188,8 +191,10 @@ def convert_dg(dependency_graph, text, offset, ann_index=0, rel_index=0, has_lem
         relation.infons['dependency'] = node.deprel
         if node.extra:
             relation.infons['extra'] = node.extra
-        relation.add_node(bioc.BioCNode('T{}'.format(annotation_id_map[node.index]), 'dependant'))
-        relation.add_node(bioc.BioCNode('T{}'.format(annotation_id_map[node.head]), 'governor'))
+        relation.add_node(bioc.BioCNode('T{}'.format(
+            annotation_id_map[node.index]), 'dependant'))
+        relation.add_node(bioc.BioCNode('T{}'.format(
+            annotation_id_map[node.head]), 'governor'))
         relations.append(relation)
         rel_index += 1
 

From 72dff0ed72ed866d062dd01f793f9f02c85ac6c8 Mon Sep 17 00:00:00 2001
From: Alistair Johnson <alistairewj@gmail.com>
Date: Tue, 19 Feb 2019 14:32:12 -0500
Subject: [PATCH 2/6] remove trailing comma tuple, likely bug

---
 negbio/pipeline/ptb2ud.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/negbio/pipeline/ptb2ud.py b/negbio/pipeline/ptb2ud.py
index e7706ce..44873c3 100644
--- a/negbio/pipeline/ptb2ud.py
+++ b/negbio/pipeline/ptb2ud.py
@@ -51,7 +51,7 @@ class Ptb2DepConverter(object):
 
     basic = 'basic'
     collapsed = 'collapsed'
-    CCprocessed = 'CCprocessed',
+    CCprocessed = 'CCprocessed'
     collapsedTree = 'collapsedTree'
 
     def __init__(self, lemmatizer, representation='CCprocessed', universal=False):

From 21ffbb7cfa9e4329b16b7cd9b3583225e1188e7b Mon Sep 17 00:00:00 2001
From: Alistair Johnson <alistairewj@gmail.com>
Date: Tue, 19 Feb 2019 14:33:39 -0500
Subject: [PATCH 3/6] fix has_lemmas being interpreted as annotation index

---
 negbio/pipeline/ptb2ud.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/negbio/pipeline/ptb2ud.py b/negbio/pipeline/ptb2ud.py
index 44873c3..8faedd9 100644
--- a/negbio/pipeline/ptb2ud.py
+++ b/negbio/pipeline/ptb2ud.py
@@ -108,7 +108,8 @@ def convert_doc(self, document):
                     dependency_graph = self.convert(
                         sentence.infons['parse tree'])
                     anns, rels = convert_dg(dependency_graph, sentence.text,
-                                            sentence.offset, self.add_lemmas)
+                                            sentence.offset,
+                                            has_lemmas=self.add_lemmas)
                     sentence.annotations = anns
                     sentence.relations = rels
                 except:

From 47fcca070ff29f3f9f3d5614dc061b8ca053ce6e Mon Sep 17 00:00:00 2001
From: Alistair Johnson <alistairewj@gmail.com>
Date: Tue, 19 Feb 2019 14:34:36 -0500
Subject: [PATCH 4/6] raise error if keyboard interrupt

---
 negbio/pipeline/ptb2ud.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/negbio/pipeline/ptb2ud.py b/negbio/pipeline/ptb2ud.py
index 8faedd9..3e831c6 100644
--- a/negbio/pipeline/ptb2ud.py
+++ b/negbio/pipeline/ptb2ud.py
@@ -112,6 +112,8 @@ def convert_doc(self, document):
                                             has_lemmas=self.add_lemmas)
                     sentence.annotations = anns
                     sentence.relations = rels
+                except KeyboardInterrupt:
+                    raise
                 except:
                     logging.exception(
                         "Cannot process sentence %d in %s", sentence.offset, document.id)

From 419768ea5e3719dd04b4c988ae184752c5ca91ca Mon Sep 17 00:00:00 2001
From: Alistair Johnson <alistairewj@gmail.com>
Date: Tue, 19 Feb 2019 18:50:14 -0500
Subject: [PATCH 5/6] fix opaque errors if parsing of sentence was empty

---
 negbio/pipeline/parse.py  | 14 +++++++++++---
 negbio/pipeline/ptb2ud.py | 11 +++++++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/negbio/pipeline/parse.py b/negbio/pipeline/parse.py
index 030aad6..fa53c11 100644
--- a/negbio/pipeline/parse.py
+++ b/negbio/pipeline/parse.py
@@ -30,9 +30,13 @@ def parse(self, s):
         """
         if not s:
             raise ValueError('Cannot parse empty sentence: {}'.format(s))
+
         try:
             nbest = self.rrp.parse(str(s))
-            return nbest[0].ptb_parse
+            if nbest:
+                return nbest[0].ptb_parse
+            else:
+                return None
         except:
             raise ValueError('Cannot parse sentence: %s' % s)
 
@@ -53,7 +57,11 @@ def parse_doc(self, document):
                 try:
                     text = sentence.text
                     tree = self.parse(text)
-                    sentence.infons['parse tree'] = str(tree)
+                    if tree:
+                        sentence.infons['parse tree'] = str(tree)
+                    else:
+                        sentence.infons['parse tree'] = None
                 except:
-                    logging.exception('Cannot parse sentence: {}'.format(sentence.offset))
+                    logging.exception(
+                        'Cannot parse sentence: {}'.format(sentence.offset))
         return document
diff --git a/negbio/pipeline/ptb2ud.py b/negbio/pipeline/ptb2ud.py
index 3e831c6..ab34f84 100644
--- a/negbio/pipeline/ptb2ud.py
+++ b/negbio/pipeline/ptb2ud.py
@@ -104,6 +104,17 @@ def __init__(self, lemmatizer, representation='CCprocessed', universal=False):
     def convert_doc(self, document):
         for passage in document.passages:
             for sentence in passage.sentences:
+                # check for empty infons, don't process if empty
+                # this sometimes happens with poorly tokenized sentences
+                if not sentence.infons:
+                    logging.warning(
+                        "No parse information for sentence %d in %s", sentence.offset, document.id)
+                    continue
+                elif not sentence.infons['parse tree']:
+                    logging.warning(
+                        "No parse tree for sentence %d in %s", sentence.offset, document.id)
+                    continue
+
                 try:
                     dependency_graph = self.convert(
                         sentence.infons['parse tree'])

From 0f7f2935cbec370db905ddafcbd27cd8674d6e39 Mon Sep 17 00:00:00 2001
From: Alistair Johnson <alistairewj@gmail.com>
Date: Tue, 19 Feb 2019 18:53:41 -0500
Subject: [PATCH 6/6] remove try/except clauses from subfunctions

---
 negbio/pipeline/parse.py  | 29 ++++++++++++-----------------
 negbio/pipeline/ptb2ud.py |  4 ----
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/negbio/pipeline/parse.py b/negbio/pipeline/parse.py
index fa53c11..c84bda0 100644
--- a/negbio/pipeline/parse.py
+++ b/negbio/pipeline/parse.py
@@ -31,14 +31,11 @@ def parse(self, s):
         if not s:
             raise ValueError('Cannot parse empty sentence: {}'.format(s))
 
-        try:
-            nbest = self.rrp.parse(str(s))
-            if nbest:
-                return nbest[0].ptb_parse
-            else:
-                return None
-        except:
-            raise ValueError('Cannot parse sentence: %s' % s)
+        nbest = self.rrp.parse(str(s))
+        if nbest:
+            return nbest[0].ptb_parse
+
+        return None
 
 
 class NegBioParser(Bllip):
@@ -54,14 +51,12 @@ def parse_doc(self, document):
         """
         for passage in document.passages:
             for sentence in passage.sentences:
-                try:
-                    text = sentence.text
-                    tree = self.parse(text)
-                    if tree:
-                        sentence.infons['parse tree'] = str(tree)
-                    else:
-                        sentence.infons['parse tree'] = None
-                except:
+                text = sentence.text
+                tree = self.parse(text)
+                if tree:
+                    sentence.infons['parse tree'] = str(tree)
+                else:
+                    sentence.infons['parse tree'] = None
                     logging.exception(
-                        'Cannot parse sentence: {}'.format(sentence.offset))
+                        'No parse tree for sentence: %s', sentence.offset)
         return document
diff --git a/negbio/pipeline/ptb2ud.py b/negbio/pipeline/ptb2ud.py
index ab34f84..8f35265 100644
--- a/negbio/pipeline/ptb2ud.py
+++ b/negbio/pipeline/ptb2ud.py
@@ -107,12 +107,8 @@ def convert_doc(self, document):
                 # check for empty infons, don't process if empty
                 # this sometimes happens with poorly tokenized sentences
                 if not sentence.infons:
-                    logging.warning(
-                        "No parse information for sentence %d in %s", sentence.offset, document.id)
                     continue
                 elif not sentence.infons['parse tree']:
-                    logging.warning(
-                        "No parse tree for sentence %d in %s", sentence.offset, document.id)
                     continue
 
                 try: