New crawler, new gobgp based backend, crawls are now based on networkxmaster
parent
87642cc4d9
commit
285ee74560
@ -0,0 +1,192 @@
|
||||
import json
|
||||
import logging
|
||||
import socket
|
||||
import time
|
||||
|
||||
from django.utils import timezone
|
||||
import networkx as nx
|
||||
|
||||
from backend import gobgp, cmk_parser
|
||||
from backend.exceptions import RouterParserException
|
||||
from bgpdata.models import ConfigHost, ASLastSeen, ASLastSeenNeighbor, CrawlRun
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
FORMAT = '%(asctime)-15s %(message)s'
|
||||
logging.basicConfig(level=logging.INFO, format=FORMAT)
|
||||
log.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def crawl():
|
||||
net = get_current_network()
|
||||
crawl = make_crawl_from_net(net)
|
||||
crawl.save()
|
||||
|
||||
# handle last seen ASNs
|
||||
log.info("Updating last seen info")
|
||||
last_seen = {}
|
||||
for asls in ASLastSeen.objects.all():
|
||||
last_seen[asls.asn] = asls
|
||||
|
||||
for asn, node in net.nodes(data=True):
|
||||
if asn not in last_seen:
|
||||
last_seen[asn] = ASLastSeen(asn=asn)
|
||||
asls = last_seen[asn]
|
||||
# print(asn, node)
|
||||
asls.online = node['online']
|
||||
if node['online']:
|
||||
asls.directlyCrawled = node['directly_crawled']
|
||||
asls.lastSeen = timezone.now()
|
||||
asls.crawlLastSeen = crawl
|
||||
else:
|
||||
asls.directlyCrawled = False
|
||||
asls.save()
|
||||
|
||||
if asls.online:
|
||||
neighs = net.neighbors(asn)
|
||||
db_neighs = set()
|
||||
for db_neigh in asls.aslastseenneighbor_set.all():
|
||||
if db_neigh.asn in neighs:
|
||||
db_neighs.add(asls.asn)
|
||||
else:
|
||||
db_neigh.delete()
|
||||
for neigh in neighs:
|
||||
if neigh not in db_neighs:
|
||||
asneigh = ASLastSeenNeighbor(asn=neigh, neighbor=asls)
|
||||
asneigh.save()
|
||||
db_neighs.add(neigh)
|
||||
log.info("Automated crawl done")
|
||||
|
||||
|
||||
def make_crawl_from_net(net):
|
||||
"""Create a CrawlRun, but don't save it"""
|
||||
asCount = asOnlineCount = asOfflineCount = 0
|
||||
for asn, node in net.nodes(data=True):
|
||||
asCount += 1
|
||||
if node['online']:
|
||||
asOnlineCount += 1
|
||||
else:
|
||||
asOfflineCount += 1
|
||||
|
||||
crawl = CrawlRun()
|
||||
crawl.startTime = timezone.now()
|
||||
crawl.graph = net_to_json(net)
|
||||
crawl.asCount = asCount
|
||||
crawl.asOnlineCount = asOnlineCount
|
||||
crawl.asOfflineCount = asOfflineCount
|
||||
crawl.peeringCount = len(net.edges)
|
||||
|
||||
return crawl
|
||||
|
||||
|
||||
def get_current_network():
|
||||
net = nx.Graph()
|
||||
|
||||
crawl_start = time.time()
|
||||
log.info("Crawl run started")
|
||||
for host in ConfigHost.objects.all():
|
||||
try:
|
||||
if host.checkMethod == 'CMK':
|
||||
data = cmk_parser.getBGPData(host.ip, host.number)
|
||||
_add_data_to_net(net, data)
|
||||
elif host.checkMethod == 'GOBGP':
|
||||
for entry in gobgp.get_bgp_data(host.ip):
|
||||
_add_data_to_net(net, entry)
|
||||
except (RouterParserException, socket.error):
|
||||
log.exception("Could not get data from host %s method %s", host, host.checkMethod)
|
||||
continue
|
||||
|
||||
log.info("Adding last seen neighbor info")
|
||||
for asls in ASLastSeen.objects.all():
|
||||
if asls.asn not in net.nodes:
|
||||
if any(neigh.asn in net.nodes for neigh in asls.aslastseenneighbor_set.all()):
|
||||
_populate_node(net, asls.asn)
|
||||
net.nodes[asls.asn]['online'] = False
|
||||
for neigh in asls.aslastseenneighbor_set.all():
|
||||
if neigh.asn not in net.nodes:
|
||||
_populate_node(net, neigh.asn)
|
||||
net.nodes[asls.asn]['online'] = False
|
||||
|
||||
log.info("Crawl done in %.2fs", time.time() - crawl_start)
|
||||
print("{} nodes, {} edges".format(len(net.nodes), len(net.edges)))
|
||||
|
||||
# add id to edges
|
||||
for n, (_, _, data) in enumerate(net.edges(data=True)):
|
||||
data['id'] = n
|
||||
|
||||
# import IPython
|
||||
# IPython.embed()
|
||||
|
||||
return net
|
||||
|
||||
|
||||
def net_to_json(net):
|
||||
"""Dum net to json, will replace all sets from the graph"""
|
||||
# replace all sets with lists for better dumpability
|
||||
for node in net.nodes.values():
|
||||
for key, val in node.items():
|
||||
if isinstance(val, set):
|
||||
node[key] = list(val)
|
||||
|
||||
return json.dumps(nx.readwrite.json_graph.node_link_data(net))
|
||||
|
||||
|
||||
def _populate_node(net, asn):
|
||||
net.add_node(asn)
|
||||
node = net.nodes[asn]
|
||||
node.setdefault("prefixes", set())
|
||||
node.setdefault("router_ids", set())
|
||||
node.setdefault("routing_table", set())
|
||||
node.setdefault("directly_crawled", False)
|
||||
node.setdefault("online", True)
|
||||
return node
|
||||
|
||||
|
||||
def _add_data_to_net(net, data):
|
||||
asn = data['local_as']
|
||||
as_node = _populate_node(net, asn)
|
||||
as_node['router_ids'].add(data['local_id'])
|
||||
as_node['directly_crawled'] = True
|
||||
|
||||
for peer in data['peers']:
|
||||
pass
|
||||
|
||||
for route in data['routes']:
|
||||
as_node['routing_table'].add((route['prefix'], tuple(route['path'])))
|
||||
|
||||
as_path = route['path']
|
||||
if not as_path:
|
||||
continue
|
||||
|
||||
orig_node = _populate_node(net, as_path[0])
|
||||
orig_node['prefixes'].add(route['prefix'])
|
||||
|
||||
for n in range(len(as_path) - 1):
|
||||
if as_path[n] != as_path[n + 1]:
|
||||
if as_path[n + 1] not in net.nodes:
|
||||
_populate_node(net, as_path[n + 1])
|
||||
net.add_edge(as_path[n], as_path[n + 1])
|
||||
|
||||
|
||||
def convert_crawl(crawl):
|
||||
net = nx.Graph()
|
||||
|
||||
for asn in crawl.as_set.all():
|
||||
if asn.number not in net.nodes:
|
||||
_populate_node(net, asn.number)
|
||||
d = net.nodes[asn.number]
|
||||
d['online'] = asn.online
|
||||
d['directly_crawled'] = asn.directlyCrawled
|
||||
for br in asn.borderrouter_set.all():
|
||||
d['router_ids'].add(br.routerID)
|
||||
|
||||
for asn in crawl.as_set.all():
|
||||
for peering in asn.getPeerings():
|
||||
net.add_edge(peering.as1.number, peering.as2.number)
|
||||
|
||||
for ann in asn.announcement_set.all():
|
||||
prefix = "{}/{}".format(ann.ip, ann.prefix)
|
||||
path = list(map(int, ann.ASPath.split()))
|
||||
net.nodes[asn.number]['routing_table'].add((prefix, tuple(path)))
|
||||
net.nodes[path[-1]]['prefixes'].add(prefix)
|
||||
|
||||
print(net_to_json(net))
|
@ -0,0 +1,2 @@
|
||||
class RouterParserException(Exception):
|
||||
pass
|
@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env python
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
sys.path.append("..")
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dnmapper.settings")
|
||||
import django
|
||||
django.setup()
|
||||
|
||||
from backend.crawler import convert_crawl
|
||||
from bgpdata.models import CrawlRun
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-c", "--crawl-id", type=int)
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
crawl = CrawlRun.objects.get(pk=args.crawl_id)
|
||||
except CrawlRun.DoesNotExist:
|
||||
parser.error("CrawlRun with id {} does not exist".format(args.crawl_id))
|
||||
|
||||
convert_crawl(crawl)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import sys
|
||||
sys.path.append("..")
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dnmapper.settings")
|
||||
import django
|
||||
django.setup()
|
||||
|
||||
from backend.crawler import crawl
|
||||
|
||||
|
||||
def main():
|
||||
crawl()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in new issue