Skip to content

Commit

Permalink
Issue InternetHealthReport#116 Include layer information to stanford.…
Browse files Browse the repository at this point in the history
…asdb AS categories (InternetHealthReport#126)

* Issue InternetHealthReport#116 Include layer information to stanford.asdb AS categories
  • Loading branch information
JustinLoye authored Feb 16, 2024
1 parent 5abddd7 commit 236f736
Showing 1 changed file with 45 additions and 8 deletions.
53 changes: 45 additions & 8 deletions iyp/crawlers/stanford/asdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ def run(self):
if req.status_code != 200:
raise RequestStatusError('Error while fetching ASdb')

lines = []
lines = set()
asns = set()
categories = set()

# Collect all ASNs and names
# Collect all ASNs, categories, layers, and PART_OF layer hierarchy
part_of_lines = set()
for line in csv.reader(req.text.splitlines(), quotechar='"', delimiter=',', skipinitialspace=True):
if not line:
continue
Expand All @@ -53,25 +54,61 @@ def run(self):

asn = int(line[0][2:])
cats = line[1:]
for category in cats:
if category:
asns.add(asn)
for i, category in enumerate(cats):
if not category:
continue

# Get layer 1 entry
if i % 2 == 0:
layer = 1
categories.add(category)
asns.add(asn)
lines.add((asn, layer, category))

# Get layer 2 entry
else:
parent_category = cats[i - 1]
if not parent_category:
continue

# Remove 'Other' subcategories
# Only store their parent category
if category == 'Other' or category == 'other':
continue

# Handle PART_OF layer hierarchy
part_of_lines.add((category, parent_category))

lines.append([asn, category])
layer = 2
categories.add(category)
asns.add(asn)
lines.add((asn, layer, category))

# get ASNs and names IDs
asn_id = self.iyp.batch_get_nodes_by_single_prop('AS', 'asn', asns)
category_id = self.iyp.batch_get_nodes_by_single_prop('Tag', 'label', categories)

# Compute PART_OF links
part_of_links = []
for (subcat, cat) in part_of_lines:

subcat_qid = category_id[subcat]
cat_qid = category_id[cat]

part_of_links.append({'src_id': subcat_qid, 'dst_id': cat_qid,
'props': [self.reference]})

self.iyp.batch_add_links('PART_OF', part_of_links)

# Compute links
links = []
for (asn, category) in lines:
for (asn, layer, category) in lines:

asn_qid = asn_id[asn]
category_qid = category_id[category]

links.append({'src_id': asn_qid, 'dst_id': category_qid, 'props': [self.reference]}) # Set AS category
links.append({'src_id': asn_qid, 'dst_id': category_qid,
'props': [self.reference, {'layer': layer}]}) # Set AS category

# Push all links to IYP
self.iyp.batch_add_links('CATEGORIZED', links)
Expand Down

0 comments on commit 236f736

Please sign in to comment.