-
Notifications
You must be signed in to change notification settings - Fork 0
/
GetSubGraphPerPerson.py
143 lines (114 loc) · 3.94 KB
/
GetSubGraphPerPerson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
## Get and plot sub-graph from Theses main graph generated by GenerateGenealogyGraph
##
## Vincent Crocher - 2022 - 2024
import pandas as pd
import networkx as nx
import pickle
import mpu
from rapidfuzz import fuzz, process
from wordcloud import WordCloud
## Extract a given field from pd
def get_this(indf, field, wherethisone, isthis):
tmp=indf[indf[wherethisone]==isthis];
return tmp.at[tmp.index[0], field];
## Find closest name and associated id
def find_closest(search):
options=difflib.get_close_matches(search, search_l, n=5, cutoff=0.4)
f=options[0]
pid=[k for k,v in mapping.items() if v == f]
return pid[0]
## Find closest possible names and associated ids
def find_closest_suggestions(search):
options=process.extract(search, search_l, limit=5)
pids=[]
suggs=[]
for o in options:
pid=[k for k,v in mapping.items() if v == o[0]]
pids.append(pid[0])
suggs.append(o[0])
return pids, suggs
def words_cloud(sub_g):
words=[]
exclude_l = ['-', 'à', 'le', 'la', 'les', 'des', 'un', 'une', 'de', 'du', 'par', 'pour', "d", "l", "et", "ou", "en", "sa", "son", "ses", "leur"
'contribution', 'contributions', 'étude', 'études']
text=""
for n in sub_g:
titre=people.loc[people['ID'] == n, 'TitreThese'].values[0]
titre=titre.replace("'", ' ')
titre=titre.lower()
text=text+' '+titre
wordcloud = WordCloud(stopwords = exclude_l,collocations=True,background_color="white").generate(text)
print(wordcloud.to_svg())
image = wordcloud.to_image()
image.show()
## Subgrah around given node
def get_subgraph(start_node, mapping):
Gd=nx.bfs_tree(G, start_node) #Get nodes downwards only
nx.set_node_attributes(Gd, 'etud', name='class')
Gu=nx.bfs_tree(G, start_node, reverse=True) #Get nodes upwards only
nx.set_node_attributes(Gu, 'dir', name='class')
G2=nx.compose(Gd,Gu.reverse()) #Merge both
for gu in Gu: #For each upward, get downwards nodes
G3=nx.bfs_tree(G, gu)
G3=nx.compose(G2,G3) #Merge
nx.set_node_attributes(G3, {start_node: 'auteur'}, name='class')
words_cloud(G3)
G3=nx.relabel_nodes(G3, mapping, copy=False)
G3=nx.relabel_nodes(G3, lambda nom: nom.replace('\n','\\n'), copy=False)
return G3
## Local quick display
def draw_local(start_node, mapping):
G2=get_subgraph(start_node, mapping)
nx.draw(G2, pos=nx.nx_agraph.graphviz_layout(G2, prog='dot'), arrows=None, with_labels=True, node_size=0, font_size=16)
## Format for agraph
def agraph_format(G2):
A = nx.nx_agraph.to_agraph(G2)
A.layout('dot', args='-Nfontsize=10 -Nwidth=".2" -Nheight=".2" -Nmargin=0 -Gfontsize=8')
return A
## To neat PNG
def draw_png(start_node, mapping):
G2=get_subgraph(start_node, mapping)
A = agraph_format(G2)
A.draw('out.png')
## To neat SVG
def draw_svg(start_node, mapping):
G2=get_subgraph(start_node, mapping)
A = agraph_format(G2)
A.draw('out.svg')
## To dot file
def draw_dot(start_node, mapping):
G2=get_subgraph(start_node, mapping)
nx.nx_agraph.write_dot(G2,'out.dot')
## Load data
import time
start = time.process_time()
with open('ThesesAssocGraph.gpickle', 'rb') as f:
G = pickle.load(f)
print(time.process_time() - start)
start = time.process_time()
mapping=mpu.io.read('ThesesMapping.pickle')
print(time.process_time() - start)
start = time.process_time()
people=mpu.io.read('ThesesPeople.pickle')
print(time.process_time() - start)
start = time.process_time()
#clean mapping list from NaNs
search_l=list(mapping.values())
print(time.process_time() - start)
start = time.process_time()
#search_l = {i.split('\n')[0] for i in search_l if type(i)==str} #TODO: but separate mapping and search_l
search_l = {i for i in search_l if type(i)==str}
print(time.process_time() - start)
## Testing
Key='Pierre Bourdieu'
start_nodes, sug=find_closest_suggestions(Key)
print(sug)
#quick test by node id
#start_nodes = ['16726785X']
start_nodes = ['269265910']
#print(start_nodes)
# Use drawing method:
#draw_local(start_nodes[0], mapping)
draw_svg(start_nodes[0], mapping)
#draw_png(start_nodes[0], mapping)
#draw_dot(start_nodes[0], mapping)