Skip to content

Commit

Permalink
Merge pull request #20 from alistairewj/master
Browse files Browse the repository at this point in the history
Fix errors and reformat ptb2ud.py
  • Loading branch information
Yifan Peng authored Feb 20, 2019
2 parents da4762f + 0f7f293 commit 7a2be81
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 17 deletions.
21 changes: 12 additions & 9 deletions negbio/pipeline/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ def parse(self, s):
"""
if not s:
raise ValueError('Cannot parse empty sentence: {}'.format(s))
try:
nbest = self.rrp.parse(str(s))

nbest = self.rrp.parse(str(s))
if nbest:
return nbest[0].ptb_parse
except:
raise ValueError('Cannot parse sentence: %s' % s)

return None


class NegBioParser(Bllip):
Expand All @@ -50,10 +51,12 @@ def parse_doc(self, document):
"""
for passage in document.passages:
for sentence in passage.sentences:
try:
text = sentence.text
tree = self.parse(text)
text = sentence.text
tree = self.parse(text)
if tree:
sentence.infons['parse tree'] = str(tree)
except:
logging.exception('Cannot parse sentence: {}'.format(sentence.offset))
else:
sentence.infons['parse tree'] = None
logging.exception(
'No parse tree for sentence: %s', sentence.offset)
return document
31 changes: 23 additions & 8 deletions negbio/pipeline/ptb2ud.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class Ptb2DepConverter(object):

basic = 'basic'
collapsed = 'collapsed'
CCprocessed = 'CCprocessed',
CCprocessed = 'CCprocessed'
collapsedTree = 'collapsedTree'

def __init__(self, lemmatizer, representation='CCprocessed', universal=False):
Expand Down Expand Up @@ -98,19 +98,32 @@ def __init__(self, lemmatizer, representation='CCprocessed', universal=False):
Args:
lemmatizer (Lemmatizer)
"""
super(NegBioPtb2DepConverter, self).__init__(lemmatizer, representation, universal)
super(NegBioPtb2DepConverter, self).__init__(
lemmatizer, representation, universal)

def convert_doc(self, document):
for passage in document.passages:
for sentence in passage.sentences:
# check for empty infons, don't process if empty
# this sometimes happens with poorly tokenized sentences
if not sentence.infons:
continue
elif not sentence.infons['parse tree']:
continue

try:
dependency_graph = self.convert(sentence.infons['parse tree'])
anns, rels = convert_dg(dependency_graph, sentence.text, sentence.offset,
self.add_lemmas)
dependency_graph = self.convert(
sentence.infons['parse tree'])
anns, rels = convert_dg(dependency_graph, sentence.text,
sentence.offset,
has_lemmas=self.add_lemmas)
sentence.annotations = anns
sentence.relations = rels
except KeyboardInterrupt:
raise
except:
logging.exception("Cannot process sentence %d in %s", sentence.offset, document.id)
logging.exception(
"Cannot process sentence %d in %s", sentence.offset, document.id)

if not self.add_lemmas:
for ann in sentence.annotations:
Expand Down Expand Up @@ -188,8 +201,10 @@ def convert_dg(dependency_graph, text, offset, ann_index=0, rel_index=0, has_lem
relation.infons['dependency'] = node.deprel
if node.extra:
relation.infons['extra'] = node.extra
relation.add_node(bioc.BioCNode('T{}'.format(annotation_id_map[node.index]), 'dependant'))
relation.add_node(bioc.BioCNode('T{}'.format(annotation_id_map[node.head]), 'governor'))
relation.add_node(bioc.BioCNode('T{}'.format(
annotation_id_map[node.index]), 'dependant'))
relation.add_node(bioc.BioCNode('T{}'.format(
annotation_id_map[node.head]), 'governor'))
relations.append(relation)
rel_index += 1

Expand Down

0 comments on commit 7a2be81

Please sign in to comment.