dnmapper/backend/crawler.py

193 lines
6.0 KiB
Python

import json
import logging
import socket
import time
from django.utils import timezone
import networkx as nx
from backend import gobgp, cmk_parser
from backend.exceptions import RouterParserException
from bgpdata.models import ConfigHost, ASLastSeen, ASLastSeenNeighbor, CrawlRun
log = logging.getLogger(__name__)
FORMAT = '%(asctime)-15s %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
log.setLevel(logging.INFO)
def crawl():
net = get_current_network()
crawl = make_crawl_from_net(net)
crawl.save()
# handle last seen ASNs
log.info("Updating last seen info")
last_seen = {}
for asls in ASLastSeen.objects.all():
last_seen[asls.asn] = asls
for asn, node in net.nodes(data=True):
if asn not in last_seen:
last_seen[asn] = ASLastSeen(asn=asn)
asls = last_seen[asn]
# print(asn, node)
asls.online = node['online']
if node['online']:
asls.directlyCrawled = node['directly_crawled']
asls.lastSeen = timezone.now()
asls.crawlLastSeen = crawl
else:
asls.directlyCrawled = False
asls.save()
if asls.online:
neighs = net.neighbors(asn)
db_neighs = set()
for db_neigh in asls.aslastseenneighbor_set.all():
if db_neigh.asn in neighs:
db_neighs.add(asls.asn)
else:
db_neigh.delete()
for neigh in neighs:
if neigh not in db_neighs:
asneigh = ASLastSeenNeighbor(asn=neigh, neighbor=asls)
asneigh.save()
db_neighs.add(neigh)
log.info("Automated crawl done")
def make_crawl_from_net(net):
"""Create a CrawlRun, but don't save it"""
asCount = asOnlineCount = asOfflineCount = 0
for asn, node in net.nodes(data=True):
asCount += 1
if node['online']:
asOnlineCount += 1
else:
asOfflineCount += 1
crawl = CrawlRun()
crawl.startTime = timezone.now()
crawl.graph = net_to_json(net)
crawl.asCount = asCount
crawl.asOnlineCount = asOnlineCount
crawl.asOfflineCount = asOfflineCount
crawl.peeringCount = len(net.edges)
return crawl
def get_current_network():
net = nx.Graph()
crawl_start = time.time()
log.info("Crawl run started")
for host in ConfigHost.objects.all():
try:
if host.checkMethod == 'CMK':
data = cmk_parser.getBGPData(host.ip, host.number)
_add_data_to_net(net, data)
elif host.checkMethod == 'GOBGP':
for entry in gobgp.get_bgp_data(host.ip):
_add_data_to_net(net, entry)
except (RouterParserException, socket.error):
log.exception("Could not get data from host %s method %s", host, host.checkMethod)
continue
log.info("Adding last seen neighbor info")
for asls in ASLastSeen.objects.all():
if asls.asn not in net.nodes:
if any(neigh.asn in net.nodes for neigh in asls.aslastseenneighbor_set.all()):
_populate_node(net, asls.asn)
net.nodes[asls.asn]['online'] = False
for neigh in asls.aslastseenneighbor_set.all():
if neigh.asn not in net.nodes:
_populate_node(net, neigh.asn)
net.nodes[asls.asn]['online'] = False
log.info("Crawl done in %.2fs", time.time() - crawl_start)
print("{} nodes, {} edges".format(len(net.nodes), len(net.edges)))
# add id to edges
for n, (_, _, data) in enumerate(net.edges(data=True)):
data['id'] = n
# import IPython
# IPython.embed()
return net
def net_to_json(net):
"""Dum net to json, will replace all sets from the graph"""
# replace all sets with lists for better dumpability
for node in net.nodes.values():
for key, val in node.items():
if isinstance(val, set):
node[key] = list(val)
return json.dumps(nx.readwrite.json_graph.node_link_data(net))
def _populate_node(net, asn):
net.add_node(asn)
node = net.nodes[asn]
node.setdefault("prefixes", set())
node.setdefault("router_ids", set())
node.setdefault("routing_table", set())
node.setdefault("directly_crawled", False)
node.setdefault("online", True)
return node
def _add_data_to_net(net, data):
asn = data['local_as']
as_node = _populate_node(net, asn)
as_node['router_ids'].add(data['local_id'])
as_node['directly_crawled'] = True
for peer in data['peers']:
pass
for route in data['routes']:
as_node['routing_table'].add((route['prefix'], tuple(route['path'])))
as_path = route['path']
if not as_path:
continue
orig_node = _populate_node(net, as_path[0])
orig_node['prefixes'].add(route['prefix'])
for n in range(len(as_path) - 1):
if as_path[n] != as_path[n + 1]:
if as_path[n + 1] not in net.nodes:
_populate_node(net, as_path[n + 1])
net.add_edge(as_path[n], as_path[n + 1])
def convert_crawl(crawl):
net = nx.Graph()
for asn in crawl.as_set.all():
if asn.number not in net.nodes:
_populate_node(net, asn.number)
d = net.nodes[asn.number]
d['online'] = asn.online
d['directly_crawled'] = asn.directlyCrawled
for br in asn.borderrouter_set.all():
d['router_ids'].add(br.routerID)
for asn in crawl.as_set.all():
for peering in asn.getPeerings():
net.add_edge(peering.as1.number, peering.as2.number)
for ann in asn.announcement_set.all():
prefix = "{}/{}".format(ann.ip, ann.prefix)
path = list(map(int, ann.ASPath.split()))
net.nodes[asn.number]['routing_table'].add((prefix, tuple(path)))
net.nodes[path[-1]]['prefixes'].add(prefix)
print(net_to_json(net))