-
Notifications
You must be signed in to change notification settings - Fork 1
/
parse_tree.py
executable file
·1670 lines (1547 loc) · 87.6 KB
/
parse_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
##############################################
# Author: -- (--)
# Description:
# Parse tree (dependency and constituency parsing)
##############################################
import logging
import re
import anytree.cachedsearch as search
from anytree import AnyNode, RenderTree, PostOrderIter
class PlaceQuestionParseTree:
spatiotemporal_propositions = ['in', 'of', 'on', 'at', 'within', 'from', 'to', 'near', 'close', 'between', 'beside',
'by', 'since', 'until', 'before', 'after', 'close to', 'near to', 'closest to',
'nearest to']
complex_spatial_propositions = [' within \d.* ',
' at most \d.* ',
' less than \d.* away ',
' more than \d.* away ',
' in \d.* radius ',
' in a range of \d.* ',
' in the range of \d.* ',
' north ', ' south ', ' east ', ' west ', ' part ',
' northeast ', ' southeast ', ' northwest ', ' southwest ']
def __init__(self, parse_dict):
self.parse_dict = parse_dict
self.tree = None
self.root = None
self.construct_tree()
def construct_tree(self):
root = AnyNode(name=self.parse_dict['word'], nodeType=self.parse_dict['nodeType'], role='',
spans={'start': 0, 'end': len(self.parse_dict['word'])})
if 'children' in self.parse_dict.keys():
for child in self.parse_dict['children']:
self.add_to_tree(child, root)
self.root = root
self.tree = RenderTree(root)
def add_to_tree(self, node, parent):
local_start = parent.name.find(node['word'])
n = AnyNode(name=node['word'], nodeType=node['nodeType'], parent=parent, role='',
spans={'start': parent.spans['start'] + local_start,
'end': parent.spans['start'] + local_start + len(node['word'])})
if 'children' in node.keys():
for child in node['children']:
self.add_to_tree(child, n)
def render(self):
self.tree = RenderTree(self.root)
def __repr__(self):
if self.tree is None:
return "Empty Tree"
res = ""
for pre, fill, node in self.tree:
res += "%s%s (%s) {%s}" % (pre, node.name, node.nodeType, node.role) + "\n"
return res
def label_tree(self):
self.clean_tree()
res = self.label_conjunctions()
res = {**res, **self.label_non_platial_objects()}
res = {**res, **self.label_numbers()}
self.update()
return res
def find_node_by_exact_name(self, string):
return search.findall_by_attr(self.root, string)
def find_node_by_name(self, string):
res = self.find_node_by_exact_name(string)
if len(res) > 0:
return res
return search.findall(self.root, filter_=lambda node: node.name in string.split())
def label_role(self, name, role, clean=False, question_words=False, comparison=False):
nodes = self.find_node_by_name(name)
if len(nodes) == 1:
nodes[0].role = role
if question_words:
nodes[0].nodeType = 'WH'
if clean:
nodes[0].children = []
else:
min_depth = 1000
selected = None
for node in nodes:
if node.depth < min_depth and node.name not in ['of']:
min_depth = node.depth
selected = node
else:
node.parent = None
selected.name = name
selected.spans = {'start': self.root.name.index(name), 'end': self.root.name.index(name) + len(name)}
if question_words:
selected.nodeType = 'WH'
elif comparison:
selected.nodeType = 'JJR'
elif not question_words and not comparison:
selected.nodeType = 'NP'
selected.role = role
def clean_tree(self):
named_objects = search.findall(self.root, filter_=lambda node: node.role in ("E", "P", "e", "p", "d", "o"))
for named_object in named_objects:
if len(named_object.siblings) == 1 and (named_object.siblings[0].nodeType == 'DT'):
named_object.parent.role = named_object.role
named_object.parent.name = named_object.name
named_object.parent.nodeType = named_object.nodeType
named_object.parent.children = []
elif len(named_object.siblings) == 1 and named_object.siblings[0].role == named_object.role:
named_object.parent.role = named_object.role
def label_spatiotemporal_relationships(self):
named_objects = search.findall(self.root, filter_=lambda node: node.role in ("P", "p", "d"))
res_relationships = {}
for named_object in named_objects:
for sibling in named_object.siblings:
if sibling.nodeType == 'IN' and named_object.parent.nodeType in ['PP', 'VP'] and \
sibling.name in PlaceQuestionParseTree.spatiotemporal_propositions:
if named_object.role == 'd':
sibling.role = 'r'
if sibling.name + '--' + str(sibling.spans['start']) not in res_relationships.keys():
res_relationships[sibling.name + '--' + str(sibling.spans['start'])] = {
'start': sibling.spans['start'], 'end': sibling.spans['end'], 'role': 'r', 'pos': 'ADP'}
else: # complex spatial relationship with ['of', 'from', 'to']
sibling.role = 'R'
if sibling.name in ['of', 'to', 'from']:
for reg in PlaceQuestionParseTree.complex_spatial_propositions:
pattern = reg + sibling.name
regex_search = re.search(pattern, self.root.name)
if regex_search is not None:
res_relationships[self.root.name[
regex_search.regs[0][0]:regex_search.regs[0][1]]
+ '--' + str(regex_search.regs[0][0])] = {
'start': regex_search.regs[0][0],
'end': regex_search.regs[0][1],
'role': 'R',
'pos': 'ADP'}
self.label_complex_spatial_relationships(sibling, pattern)
else:
if sibling.name + '--' + str(
sibling.spans['start']) not in res_relationships.keys():
res_relationships[sibling.name + '--' + str(sibling.spans['start'])
] = {'start': sibling.spans['start'],
'end': sibling.spans['end'],
'role': 'R', 'pos': 'ADP'}
else:
if sibling.name + '--' + str(sibling.spans['start']) not in res_relationships.keys():
res_relationships[sibling.name + '--' + str(sibling.spans['start'])
] = {'start': sibling.spans['start'],
'end': sibling.spans['end'], 'role': 'R', 'pos': 'ADP'}
named_object.parent.role = 'LOCATION'
return res_relationships
def all_encodings(self):
res = {}
roles = search.findall(self.root, filter_=lambda node: node.role != '')
for role in roles:
key = role.role
val = role.name
if key not in res.keys():
res[key] = []
res[key].append(val)
return res
def label_complex_spatial_relationships(self, prep, pattern):
matched = False
context = prep.parent
text = ''
while not matched:
regex_search = re.search(pattern.strip(), context.name)
if regex_search is not None:
matched = True
text = context.name[regex_search.regs[0][0]: regex_search.regs[0][1]]
break
if context.parent is None:
break
context = context.parent
if matched:
if context.name == text:
context.role = 'R'
else:
nodes = PlaceQuestionParseTree.iterate_and_find(context, text)
new_node = AnyNode(name=text, nodeType='IN', role='R', spans={'start': nodes[0].spans['start'],
'end': nodes[len(nodes) - 1].spans[
'end']})
before = []
after = []
firstparent = nodes[0].parent
if firstparent != context:
for child in context.children:
if self.root.name.index(child.name) + len(child.name) <= self.root.name.index(text):
before.append(child)
for child in firstparent.children:
if child in nodes:
break
before.append(child)
lastparent = prep.parent
for child in lastparent.children:
if child not in nodes:
after.append(child)
while lastparent != context:
lastparent = lastparent.parent
for child in lastparent.children:
if self.root.name.index(text) + len(text) <= self.root.name.index(child.name):
after.append(child)
context.children = []
for b in before:
b.parent = context
for node in nodes:
node.parent = new_node
new_node.parent = context
for a in after:
a.parent = context
@staticmethod
def iterate_and_find(node, text):
res = []
for child in node.children:
if child.name in text:
res.append(child)
text = text.replace(child.name, '', 1)
elif text.strip() != '':
res.extend(PlaceQuestionParseTree.iterate_and_find(child, text))
return res
def label_complex_comparison(self, reg_results, comparison, role):
contexts = search.findall(self.root, filter_=lambda node: node.spans['start'] <= reg_results.regs[0][0] and
node.spans['end'] >= reg_results.regs[0][1])
context = None
vals = comparison.split()
max_depth = -1
for c in contexts:
if c.depth >= max_depth:
context = c
max_depth = c.depth
first = search.findall(context, filter_=lambda node: node.name == vals[0])[0]
if first.parent.children.index(first) + 1 == len(first.parent.children):
return
elif first.parent.children[first.parent.children.index(first) + 1].role not in ['p', 'e', 'o']:
return
second = search.findall(context, filter_=lambda node: node.name == vals[1])[0]
if first.parent != second.parent:
second.parent.name = second.parent.name.replace(second.name, '').strip()
second.parent = None
first.parent.name = first.parent.name + ' ' + second.name
first.parent.spans = {'start': first.parent.spans['start'], 'end': second.spans['end']}
first.name = comparison
first.role = role
def clean_locations(self):
named_objects = search.findall(self.root, filter_=lambda node: node.role == 'LOCATION')
if len(named_objects) == 2:
if named_objects[0].depth < named_objects[1].depth:
if self.root.name.index(named_objects[0].name) < self.root.name.index(named_objects[1].name):
PlaceQuestionParseTree.merge(node1=named_objects[0], node2=named_objects[1])
else:
PlaceQuestionParseTree.merge(node1=named_objects[0], node2=named_objects[1], order=False)
else:
if self.root.name.index(named_objects[0].name) < self.root.name.index(named_objects[1].name):
PlaceQuestionParseTree.merge(node1=named_objects[1], node2=named_objects[0], order=False)
else:
PlaceQuestionParseTree.merge(node1=named_objects[1], node2=named_objects[0])
def clean_phrases(self):
single_child_nodes = search.findall(self.root, filter_=lambda node: len(node.children) == 1)
for node in single_child_nodes:
try:
if node.role == '':
node.role = node.children[0].role
node.nodeType = node.children[0].nodeType
children = node.children[0].children
node.children[0].parent = None
node.children = children
except:
print('error in cleaning...')
incorrect_types = search.findall(self.root, filter_=lambda node: len(node.children) > 0 and
node.role in ['p', 'P'])
for it in incorrect_types:
if len(search.findall(it, filter_=lambda node: node != it and node.role in ['p', 'P'])) == 0:
it.role = ''
@staticmethod
def merge(node1, node2, order=True):
node = None
start = min(node1.spans['start'], node2.spans['start'])
end = max(node1.spans['end'], node2.spans['end'])
if order:
node = AnyNode(name=node1.name + ' ' + node2.name, nodeType=node1.nodeType, role=node1.role,
spans={'start': start, 'end': end})
else:
node = AnyNode(name=node2.name + ' ' + node1.name, nodeType=node1.nodeType, role=node1.role,
spans={'start': start, 'end': end})
node.parent = node1.parent
if order:
node1.parent = node
node2.parent = node
else:
node2.parent = node
node1.parent = node
def update(self):
for node in PostOrderIter(self.root):
if len(node.children) > 0:
name = ''
for child in node.children:
name += child.name + ' '
if node.name != name:
node.name = name.strip()
if len(node.children) == 1 and (node.role == '' or node.role == node.children[0].role) and \
node.nodeType == node.children[0].nodeType:
node.role = node.children[0].role
node.children = node.children[0].children
def label_non_platial_objects(self):
npos = search.findall(self.root, filter_=lambda node: node.nodeType.startswith('N') and
node.role == '' and len(node.children) == 0)
res = {}
for npo in npos:
npo.role = 'o'
if npo.name in ['border', 'cross', 'crosses', 'borders', 'flow', 'flows']:
npo.role = 's'
for npo in npos:
parent = npo.parent
if parent is not None:
all_objects = True
for child in parent.children:
if child.role != 'o' and child.nodeType != 'DT' and child.role != 'p':
all_objects = False
if all_objects:
parent.role = 'o'
parent.children = []
res[parent.name + '--' + str(parent.spans['start'])] = {'start': parent.spans['start'],
'end': parent.spans['end'],
'role': 'o',
'pos': 'NOUN'}
else:
res[npo.name + '--' + str(npo.spans['start'])] = {'start': npo.spans['start'],
'end': npo.spans['end'],
'role': npo.role,
'pos': 'NOUN'}
return res
def get_verbs(self):
verb_nodes = search.findall(self.root,
filter_=lambda node: node.nodeType.startswith("VB") and ' ' not in node.name)
verbs = []
for node in verb_nodes:
verbs.append(node.name)
return verbs
def label_situation_activities(self, verbs, decisions):
res = {}
verb_nodes = search.findall(self.root,
filter_=lambda node: node.nodeType.startswith("VB") and node.name in verbs)
for i in range(len(verbs)):
node = verb_nodes[i]
decision = decisions[i]
if decision != 'u' and node.name not in ['is', 'are', 'do', 'does', 'be', 'was', 'were', 'located']:
node.role = decision
res[node.name + '--' + str(node.spans['start'])] = {'start': node.spans['start'],
'end': node.spans['end'],
'role': node.role, 'pos': 'VERB'}
else:
print("this verb is suspicious: " + str(node.name))
situations = search.findall(self.root, filter_=lambda node: node.role == 's')
for situation in situations:
for sibiling in situation.siblings:
if sibiling.role == '' and sibiling.nodeType == 'PP':
if len(search.findall(sibiling, filter_=lambda node: node.role in ('e', 'o', 'E'))) > 0:
sibiling.role = 's'
activities = search.findall(self.root, filter_=lambda node: node.role == 'a')
for activity in activities:
for sibiling in activity.siblings:
if sibiling.role == '' and sibiling.nodeType == 'PP':
if len(search.findall(sibiling, filter_=lambda node: node.role in ('o'))) > 0:
sibiling.role = 'a'
return res
def label_events_actions(self):
nodes = search.findall(self.root,
filter_=lambda node: node.nodeType.startswith("V") and 'P' in node.nodeType and
node.role == '')
for node in nodes:
actions = 0
events = 0
for child in node.children:
if child.role == 'a':
actions += 1
if child.role == 'e' or child.role == 'E':
events += 1
if events > 0 and actions == 0:
node.role = 'EVENT'
elif actions > 0 and events == 0:
node.role = 'ACTION'
def label_numeric_values(self):
nodes = search.findall(self.root, filter_=lambda node: node.nodeType == 'CD' and node.role == '' and
len(node.children) == 0)
for node in nodes:
node.role = 'n'
def label_conjunctions(self):
res = {}
try:
nodes = search.findall(self.root, filter_=lambda node: node.nodeType in ('CC', 'IN', 'SCONJ', 'CCONJ')
and node.role == '' and len(node.children) == 0)
for node in nodes:
if node.name in ['and', 'both']:
node.role = '&'
res[node.name + '--' + str(node.spans['start'])] = {'start': node.spans['start'],
'end': node.spans['end'], 'role': node.role,
'pos': 'CCONJ'}
elif node.name in ['or', 'whether']:
node.role = '|'
res[node.name + '--' + str(node.spans['start'])] = {'start': node.spans['start'],
'end': node.spans['end'], 'role': node.role,
'pos': 'CCONJ'}
elif node.name in ['not', 'neither', 'nor', 'but', 'except']:
node.role = '!'
res[node.name + '--' + str(node.spans['start'])] = {'start': node.spans['start'],
'end': node.spans['end'], 'role': node.role,
'pos': 'SCONJ'}
siblings = search.findall(node.parent, filter_=lambda node: node.role not in ('&', '|', '!', 'q') and
node.nodeType != 'DT' and (
node.role != '' or node.nodeType == ','))
sibling_roles = set()
for sibling in siblings:
if sibling.nodeType == ',':
sibling.role = node.role
res[sibling.name + '--' + str(sibling.spans['start'])] = {
'start': sibling.spans['start'], 'end': sibling.spans['end'], 'role': sibling.role,
'pos': res[node.name]['pos']}
else:
sibling_roles.add(sibling.role)
if len(sibling_roles) == 1:
node.parent.role = list(sibling_roles)[0]
self.update()
except:
logging.error('error in finding conjunctions...')
return res
def label_numbers(self):
numbers = search.findall(self.root, filter_=lambda node: node.role == '' and node.nodeType == 'CD')
units = {}
for num in numbers:
num.role = 'n'
check = False
added = False
for sibling in num.parent.children:
if sibling == num:
check = True
elif check and sibling.name in PlaceDependencyTree.UNITS:
if num.parent.role == '':
num.parent.role = 'MEASURE'
if num.name + ' ' + sibling.name in self.root.name:
units[num.name + ' ' + sibling.name + '--' + str(num.spans['start'])] = {
'start': num.spans['start'],
'end': sibling.spans['end'] + 1,
'role': 'n',
'pos': 'NUM'}
added = True
if not added and num.parent.nodeType == 'QP' and num.parent.parent is not None:
found = False
for child in num.parent.parent.children:
if child == num.parent:
found = True
elif found and child.name in PlaceDependencyTree.UNITS:
new_node = AnyNode(child.parent, role='MEASURE', name=num.name + ' ' + child.name,
nodeType='NP', spans={
'start': self.root.name.index(num.name + ' ' + child.name),
'end': self.root.name.index(num.name + ' ' + child.name) +
len(num.name + ' ' + child.name)
})
num.parent = new_node
child.parent = new_node
units[new_node.name + '--' + str(new_node.spans['start'])] = {'start': new_node.spans['start'],
'end': new_node.spans['end'],
'role': 'n',
'pos': 'NUM'
}
else:
units[num.name + '--' + str(num.spans['start'])] = {'start': num.spans['start'],
'end': num.spans['end'],
'role': 'n',
'pos': 'NUM'
}
return units
def label_qualities(self):
compounds = {}
adjectives = search.findall(self.root, filter_=lambda node: node.nodeType.startswith('AD'))
for adj in adjectives:
if len(search.findall(adj, filter_=lambda node: node.nodeType in ['CC', 'NP', 'NNS', 'NN'])) == 0:
res = PlaceQuestionParseTree.label_adjective_roles(adj)
compounds = {**compounds, **res}
other_adjectives = search.findall(self.root,
filter_=lambda node: node.nodeType.startswith('J') and node.parent.role == '')
for adj in other_adjectives:
res = PlaceQuestionParseTree.label_adjective_roles(adj)
compounds = {**compounds, **res}
return compounds
@staticmethod
def label_adjective_roles(adj):
compounds = {}
found = False
for child in adj.parent.children:
if not found and adj.nodeType.startswith('J') and child.nodeType == 'RBS':
if child.name + ' ' + adj.name in adj.parent.name:
adj.name = child.name + ' ' + adj.name
adj.nodeType = 'JJS'
child.parent = None
if child == adj:
found = True
elif found and child.nodeType.startswith('N'):
if child.role in ['o', 'e', 'E']:
adj.role = 'q'
elif child.role in ['p', 'P']:
adj.role = 'Q'
else:
print('unresolved adjective! ' + adj.name + ' ' + child.name)
# if ' ' in adj.name:
compounds[adj.name + '--' + str(adj.spans['start'])] = {'start': adj.spans['start'],
'end': adj.spans['end'],
'role': adj.role, 'pos': 'ADJ'}
break
elif found and child.nodeType in ['PP', 'IN']:
if child.nodeType == 'IN':
adj.parent = None
child.name = adj.name + ' ' + child.name
if child.name.endswith('than'):
child.role = '<>'
compounds[child.name + '--' + str(child.spans['start'])] = {'start': child.spans['start'],
'end': child.spans['end'],
'role': child.role, 'pos': 'ADJ'}
elif child.nodeType == 'PP' and child.children[0].nodeType == 'IN':
if adj.parent is not None and len(adj.parent.children) == 2:
child.parent = adj.parent
child.name = adj.name + ' ' + child.name
child.spans = {'start': adj.spans['start'], 'end': child.spans['end']}
adj.parent = None
child.children[0].name = adj.name + ' ' + child.children[0].name
child.children[0].spans = {'start': adj.spans['start'], 'end': child.children[0].spans['end']}
if child.children[0].name.endswith('than'):
child.children[0].role = '<>'
compounds[child.children[0].name + '--' + str(child.children[0].spans['start'])] = {
'start': child.children[0].spans['start'], 'end': child.children[0].spans['end'],
'role': child.children[0].role, 'pos': 'ADJ'}
else:
adj.parent = None
child.children[0].name = adj.name + ' ' + child.children[0].name
child.children[0].spans = {'start': adj.spans['start'], 'end': child.children[0].spans['end']}
if child.children[0].name.endswith('than'):
child.children[0].role = '<>'
compounds[child.children[0].name + '--' + str(child.children[0].spans['start'])] = {
'start': child.children[0].spans['start'],
'end': child.children[0].spans['end'],
'role': child.children[0].role, 'pos': 'ADJ'}
else:
print('unresolved adjective ' + adj.name + ' ' + child.name)
return compounds
@staticmethod
def context_builder(list_str, node):
boolean_var = True
for string in list_str:
boolean_var = boolean_var and string in node.name # multi-word?
return boolean_var
def search_context(self, list_str):
nodes = search.findall(self.root, filter_=lambda node: PlaceQuestionParseTree.context_builder(list_str, node))
max_depth = -1
selected = None
for node in nodes:
if node.depth > max_depth:
max_depth = node.depth
selected = node
return selected
def apply_dependencies(self, dependencies):
verb_deps = []
cc_deps = []
adj_noun_deps = []
complex_prep = []
comparisons = []
units = []
for dependency in dependencies:
if dependency.relation.link == 'HAS/RELATE' and 'VERB' in dependency.arg1.attributes and (
'NOUN' in dependency.arg2.attributes or 'PROPN' in dependency.arg2.attributes):
verb_deps.append(dependency)
elif dependency.relation.link == 'IS/ARE' and dependency.relation.name == 'ADJ':
adj_noun_deps.append(dependency)
elif dependency.relation.link == 'IS/ARE' and dependency.relation.name == 'PRP':
complex_prep.append(dependency)
elif dependency.relation.name == 'UNIT':
units.append(dependency)
elif dependency.relation.attributes is not None:
if 'CCONJ' in dependency.relation.attributes or 'SCONJ' in dependency.relation.attributes:
cc_deps.append(dependency)
elif dependency.relation.name != 'RELATION' and 'ADJ' in dependency.relation.attributes:
comparisons.append(dependency)
print('Complex Prepositions:')
self.apply_complex_relationships_dependencies(complex_prep)
print('Verb-Noun Relationships:')
self.apply_verb_noun_dependencies(verb_deps)
print('Conjunctions:')
self.apply_conjunction_dependencies(cc_deps)
print('Adjective-Noun Relationships:')
self.apply_adj_noun_dependencies(adj_noun_deps)
print('Comparisons:')
self.apply_comparison_dependencies(comparisons)
print('Units:')
self.apply_unit_dependencies(units)
def apply_verb_noun_dependencies(self, dependencies):
for dep in dependencies:
str_list = [dep.arg1.name, dep.arg2.name]
context = self.search_context(str_list)
print(context)
def apply_complex_relationships_dependencies(self, dependencies):
for dep in dependencies:
str_list = [dep.arg1.name, dep.arg2.name]
context = self.search_context(str_list)
print(context)
def apply_conjunction_dependencies(self, dependencies):
for dep in dependencies:
str_list = [dep.relation.name, dep.arg1.name, dep.arg2.name]
context = self.search_context(str_list)
print(context)
def apply_adj_noun_dependencies(self, dependencies):
for dep in dependencies:
str_list = [dep.arg1.name, dep.arg2.name]
context = self.search_context(str_list)
print(context)
def apply_unit_dependencies(self, dependencies):
for dep in dependencies:
str_list = [dep.arg1.name, dep.arg2.name]
context = self.search_context(str_list)
print(context)
def apply_comparison_dependencies(self, dependencies):
for dep in dependencies:
str_list = [dep.relation.name, dep.arg1.name, dep.arg2.name]
context = self.search_context(str_list)
firsts = search.findall(context, filter_=lambda node: dep.arg1.name in node.name and node != context)
seconds = search.findall(context, filter_=lambda node: dep.arg2.name in node.name and node != context)
first = PlaceQuestionParseTree.valid_node_selection(firsts, ['NN', 'NNS', 'NP', 'NPS'],
['VB', 'VP', 'VBZ'])
second = PlaceQuestionParseTree.valid_node_selection(seconds, ['NN', 'NNS', 'NP', 'NPS'],
['VB', 'VP', 'VBZ'])
relation = PlaceQuestionParseTree.find_exact_match(context, dep.relation.name)
print(first)
print(second)
print(relation)
first.parent = relation
second.parent = relation
relation.role = 'COMPARISON'
relation.parent.children = [relation]
relation.parent.name = ' '.join([first.name, relation.name, second.name])
self.clean_tree()
@staticmethod
def valid_node_selection(nodes, valid_pos_tags, invalid_tags):
if len(nodes) == 1:
return nodes[0]
max_depth = -1
selected = None
for node in nodes:
invalid_child = search.findall(node, filter_=lambda child: child != node and child.nodeType in invalid_tags)
if len(invalid_child) == 0 and node.nodeType in valid_pos_tags and max_depth < node.depth:
max_depth = node.depth
selected = node
return selected
@staticmethod
def find_exact_match(context, name):
matches = search.findall(context, filter_=lambda node: node.name == name)
max_depth = 1000
selected = None
for match in matches:
if max_depth > match.depth:
max_depth = match.depth
selected = match
selected.children = []
return selected
class Dependency:
def __init__(self, node1, relation, node2=None):
self.arg1 = node1
self.relation = relation
self.arg2 = node2
self.extra = []
def is_binary(self):
if self.arg2 is None:
return False
return True
def __repr__(self):
string = '\n' + str(self.relation) + ':\n\t' + str(self.arg1)
if self.is_binary():
string += '\n\t' + str(self.arg2)
for ex in self.extra:
string += '\n\t\t' + str(ex)
return string
class FOLGenerator:
CONCEPTS = {'P': 'PLACE', 'E': 'EVENT', 'L': 'LOCATION', 'd': 'DATE'}
SPECIAL_CHARS = {'and': 8743, 'or': 8744, 'not': 172, 'implies': 8658, 'universal': 8704, 'existential': 8707}
# e.g., result = chr(SPECIAL_CHARS['existential'])+' x0: Place(Tehran) '
# +chr(SPECIAL_CHARS['and'])+' IN(x0, Tehran) '+chr(SPECIAL_CHARS['and'])+' City(x0)'
def __init__(self, cons_tree, dep_tree):
self.cons = cons_tree
self.dep = dep_tree
self.dependencies = {}
self.variables = {}
self.constants = []
self.dep_places = []
self.rels = {'property': [], 'spatial': []}
def generate_dependencies(self):
self.dependencies['intent'] = self.extract_intent_dependency()
# order -- declaration, conjunction, spatial relationships, qualities, comparison
self.dependencies['declaration'] = []
self.declare()
self.dependencies['criteria'] = []
self.extract_conjunctions()
self.extract_property_relationships()
self.extract_quality_relations()
self.extract_spatiotemporal_relationships()
self.extract_situations()
self.extract_comparisons()
return self.dependencies
def declare(self):
specifics = search.findall(self.dep.root, filter_=lambda node: node.role in ['P', 'E', 'd'])
for node in specifics:
first = PlaceDependencyTree.clone_node_without_children(node)
self.constants.append(node.name)
relation = AnyNode(name='DECLARE', spans=[{}], attributes=None, link='IS', nodeType='RELATION')
second = AnyNode(name=FOLGenerator.CONCEPTS[node.role], spans=[{}], attributes=None,
link=node.name, nodeType='CONCEPT')
self.dependencies['declaration'].append(Dependency(first, relation, second))
var_id = 0
generics = search.findall(self.dep.root, filter_=lambda node: node.role in ['p', 'o', 'e'])
for generic in generics:
first = PlaceDependencyTree.clone_node_without_children(generic)
relation = AnyNode(name='DECLARE', spans=[{}], attributes=None, link='IS', nodeType='RELATION')
second = AnyNode(name='x' + str(var_id), spans=[{}], attributes=None,
link=PlaceDependencyTree.preprocess_names(generic.name), nodeType='VARIABLE')
self.dependencies['declaration'].append(Dependency(first, relation, second))
self.variables[first.name] = 'x' + str(var_id)
var_id += 1
def extract_intent_dependency(self):
question_words = search.findall(self.cons.root, filter_=lambda node: node.nodeType == 'WH')
selected = None
if len(question_words) > 1:
min_start = 1000
for node in question_words:
if node.spans['start'] < min_start:
min_start = node.spans['start']
selected = node
elif len(question_words) == 1:
selected = question_words[0]
if selected is None:
selected = AnyNode(name='what', spans=[{}], attributes=None, link='IS/ARE', nodeType='WH', role='1')
first = PlaceDependencyTree.clone_node_without_children(selected, cons_tree=True)
if selected.role == '8':
relation = AnyNode(name='INTENT', spans=[{}], attributes=None, link='IS/ARE', nodeType='RELATION')
intent = Dependency(node1=first, relation=relation)
return [intent]
seconds = search.findall(self.cons.root, filter_=lambda node: node.role in ['o', 'p',
'ACTION', 'EVENT', 'SITUATION'])
what = None
if len(seconds) == 0:
seconds = search.findall(self.cons.root, filter_=lambda node: node.role == 'P')
if len(seconds) == 1:
what = seconds[0]
else:
max_depth = -1
min_start = 1000
for second in seconds:
if second.spans['start'] < min_start:
if second.role == 'P' and (selected.nodeType.startswith('WH') or
second.parent.nodeType != 'PP'):
continue
else:
what = second
min_start = second.spans['start']
max_depth = second.depth
elif second.spans['start'] == min_start and second.depth > max_depth:
what = second
max_depth = second.depth
if what is None:
what = seconds[0]
second = PlaceDependencyTree.clone_node_without_children(what, cons_tree=True)
if selected.role == '1': # where questions
relation = AnyNode(name='INTENT', spans=[{}], attributes=None, link='LOCATION', nodeType='RELATION')
elif selected.role == '6': # how many
relation = AnyNode(name='INTENT', spans=[{}], attributes=None, link='COUNT', nodeType='RELATION')
else:
if ' ' in first.name:
first.name = first.name.split()[1]
relation = AnyNode(name='INTENT', spans=[{}], attributes=None, link=first.name.upper(), nodeType='RELATION')
intent = Dependency(node1=first, relation=relation, node2=second)
return [intent]
def print_dependencies(self):
str_deps = ''
for k, v in self.dependencies.items():
print(k)
str_deps += k + '\n'
print('value: \n' + str(v))
str_deps += str(v) + '\n'
return str_deps
def print_logical_form(self):
# intent
logical_form = ''
intent = self.dependencies['intent'][0]
complex_intents = self.apply_conjunction_intent()
self.dependencies['intent'].extend(complex_intents)
if intent.arg1.role == '8':
if intent.arg2 is not None:
logical_form += chr(FOLGenerator.SPECIAL_CHARS['existential']) + ' ' + intent.arg2.name
for i in complex_intents:
logical_form += ', ' + i.arg2.name
elif intent.arg1.role == '6' or intent.arg1.role == '1': # how many, where
logical_form += intent.relation.link + '(' + intent.arg2.name + ')'
for i in complex_intents:
logical_form += ', ' + intent.relation.link + '(' + i.arg2.name + ')'
else:
logical_form += intent.arg2.name
for i in complex_intents:
logical_form += ', ' + i.arg2.name
if logical_form != '':
logical_form += ': '
# declarations
declarations = self.dependencies['declaration']
for declaration in declarations:
if declaration.arg2.nodeType == 'VARIABLE':
logical_form += declaration.arg2.link.replace(' ', '_').upper() + '(' + declaration.arg2.name + ') ' + \
chr(FOLGenerator.SPECIAL_CHARS['and']) + ' '
else:
logical_form += declaration.arg2.name + '(' + declaration.arg1.name + ') ' + \
chr(FOLGenerator.SPECIAL_CHARS['and']) + ' '
# criteria
self.apply_conjunction_criteria()
criteria = self.dependencies['criteria']
counter = 1
for criterion in criteria:
if criterion.relation.link == 'AND/OR':
counter += 1
continue
if counter == len(criteria):
logical_form = self.generate_FOL_criterion(criterion, logical_form, last=True)
else:
logical_form = self.generate_FOL_criterion(criterion, logical_form)
counter += 1
if logical_form.endswith(chr(FOLGenerator.SPECIAL_CHARS['and']) + ' '):
logical_form = logical_form[0: len(logical_form) - 2]
for key, var in self.variables.items():
logical_form = logical_form.replace(key, var)
print(logical_form)
print()
return logical_form
def generate_FOL_criterion(self, criterion, logical_form, last=False):
# if criterion.arg1.name in self.variables.keys():
# criterion.arg1.name = self.variables[criterion.arg1.name]
# if criterion is not None and criterion.arg2.name in self.variables.keys():
# criterion.arg2.name = self.variables[criterion.arg2.name]
if criterion.relation.link in ['PROPERTY', 'NOT']:
logical_form += criterion.relation.name.upper().replace(' ', '_') + '(' + criterion.arg1.name
if criterion.arg2 is not None:
logical_form += ', ' + criterion.arg2.name
logical_form += ') '
elif criterion.relation.link == 'SUPERLATIVE':
if criterion.arg1.name in self.variables.keys() and criterion.arg1.role == 'p':
logical_form = logical_form.replace(criterion.arg1.name, criterion.arg2.name.replace(' ', '_').upper() +
'(' + criterion.arg1.name + ')', 1)
if last:
logical_form = logical_form[0: len(logical_form) - 2]
last = True
else:
logical_form += criterion.arg2.name.replace(' ', '_').upper() + '(' + criterion.arg1.name + ') '
else: # other
logical_form += criterion.relation.name.upper().replace(' ', '_') + '(' + criterion.arg1.name
if criterion.arg2 is not None:
logical_form += ', ' + criterion.arg2.name
extra = ''
for ex in criterion.extra:
logical_form += ', ' + ex.name
extra += ex.name + ' '
logical_form += ') '
logical_form = logical_form.replace(extra.upper().replace(" ", "_"), '')
if not last:
logical_form += chr(FOLGenerator.SPECIAL_CHARS['and']) + ' '
return logical_form
def apply_conjunction_intent(self):
result = []
intent = self.dependencies['intent'][0]
if intent.arg2 is None:
return result
name = intent.arg2.name
criteria = self.dependencies['criteria']
for criterion in criteria:
found = False
if criterion.relation.link == 'AND/OR':
second = None
if criterion.arg1.name == name:
second = PlaceDependencyTree.clone_node_without_children(intent.arg2)
second.name = criterion.arg2.name
found = True
elif criterion.arg2.name == name:
second = PlaceDependencyTree.clone_node_without_children(intent.arg2)
second.name = criterion.arg1.name
found = True
if found:
first = PlaceDependencyTree.clone_node_without_children(intent.arg1)
relation = intent.relation
result.append(Dependency(first, relation, second))
return result
def check_valid_relationships(self, first, second):
if first.name + '-' + second.name in self.rels['spatial']:
return False
elif first.name + '-' + second.name in self.rels['property']:
return False
return True
def apply_conjunction_criteria(self):
criteria = self.dependencies['criteria']
and_or_criteria = []
for criterion in criteria:
if criterion.relation.link == 'AND/OR':
and_or_criteria.append(criterion)
if len(and_or_criteria) == 0:
return
new_criteria = []
for criterion in criteria:
if criterion.relation.link == 'AND/OR':
continue
for ao in and_or_criteria:
first = None
relation = None
second = None
found = False
if criterion.arg1.name == ao.arg1.name:
first = PlaceDependencyTree.clone_node_without_children(criterion.arg1)
first.name = ao.arg2.name
found = True
elif criterion.arg2.name == ao.arg1.name:
second = PlaceDependencyTree.clone_node_without_children(criterion.arg2)
second.name = ao.arg2.name
found = True
elif criterion.arg1.name == ao.arg2.name:
first = PlaceDependencyTree.clone_node_without_children(criterion.arg1)
first.name = ao.arg1.name