-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgephi_input.py
35 lines (27 loc) · 1.06 KB
/
gephi_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from semantometrics import Similarity
import glob
import os
import csv
def main():
"""Retrieves the files and creates input files for Gephi"""
files = glob.glob('corpus/*.txt')
# Calculate pairwise similarity between files
s = Similarity(files)
ps = s.pairwise_similarity()
# Write output
with open('nodes.csv', 'wb') as node_file:
with open('edges.csv', 'wb') as edge_file:
node_writer = csv.writer(node_file)
node_writer.writerow(['Nodes', 'Id', 'Label'])
edge_writer = csv.writer(edge_file)
edge_writer.writerow(['Source', 'Target', 'Type', 'Id', 'Weight'])
id_counter = 0
for i, f in enumerate(files):
filename = os.path.basename(f)
node_writer.writerow([filename, i, filename])
for j in range(i + 1, len(files)):
id_counter += 1
cossim = round(ps[i, j], 4)
edge_writer.writerow([i, j, 'Undirected', id_counter, cossim])
if __name__ == "__main__":
main()