WIP: Rework complete backend

New crawler, new gobgp based backend, crawls are now based on networkx
This commit is contained in:
Sebastian Lohff 2020-06-06 17:46:08 +02:00
parent 87642cc4d9
commit 285ee74560
11 changed files with 325 additions and 24 deletions

View File

@ -4,13 +4,12 @@
# Written by Sebastian Lohff (seba@someserver.de)
from __future__ import print_function
from collections import OrderedDict
import re
import socket
from collections import OrderedDict
from backend.exceptions import RouterParserException
class RouterParserException(Exception):
def err(msg):
raise RouterParserException(msg)
@ -27,6 +26,7 @@ def getBGPData(ip, asno):
return router
def getDataFromHost(ip):
x = socket.socket()
@ -184,9 +184,9 @@ def _birdFindRoutes(info):
for key in ["path", "nexthop", "network", "iBGP"]:
if key not in candidate:
route = {"prefix": candidate["network"], "nexthop": candidate["nexthop"], "path": candidate["path"], "iBGP": candidate["iBGP"]}
route = {"prefix": candidate["network"], "nexthop": candidate["nexthop"],
"path": list(map(int, candidate["path"])), "iBGP": candidate["iBGP"]}
routes = []
candidate = None
@ -347,7 +347,8 @@ def _quaggaFindRoutes(raw):
# currently skip incomplete routes
if '?' not in path:
route = {"prefix": d["network"], "nexthop": d["nexthop"], "path": path, "iBGP": d["origin"] == "i"}
route = {"prefix": d["network"], "nexthop": d["nexthop"],
"path": list(map(int, path)), "iBGP": d["origin"] == "i"}
return routes

backend/crawler.py Normal file
View File

@ -0,0 +1,192 @@
import json
import logging
import socket
import time
from django.utils import timezone
import networkx as nx
from backend import gobgp, cmk_parser
from backend.exceptions import RouterParserException
from bgpdata.models import ConfigHost, ASLastSeen, ASLastSeenNeighbor, CrawlRun
log = logging.getLogger(__name__)
FORMAT = '%(asctime)-15s %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
def crawl():
net = get_current_network()
crawl = make_crawl_from_net(net)
# handle last seen ASNs
log.info("Updating last seen info")
last_seen = {}
for asls in ASLastSeen.objects.all():
last_seen[asls.asn] = asls
for asn, node in net.nodes(data=True):
if asn not in last_seen:
last_seen[asn] = ASLastSeen(asn=asn)
asls = last_seen[asn]
# print(asn, node)
asls.online = node['online']
if node['online']:
asls.directlyCrawled = node['directly_crawled']
asls.lastSeen = timezone.now()
asls.crawlLastSeen = crawl
asls.directlyCrawled = False
if asls.online:
neighs = net.neighbors(asn)
db_neighs = set()
for db_neigh in asls.aslastseenneighbor_set.all():
if db_neigh.asn in neighs:
for neigh in neighs:
if neigh not in db_neighs:
asneigh = ASLastSeenNeighbor(asn=neigh, neighbor=asls)
log.info("Automated crawl done")
def make_crawl_from_net(net):
"""Create a CrawlRun, but don't save it"""
asCount = asOnlineCount = asOfflineCount = 0
for asn, node in net.nodes(data=True):
asCount += 1
if node['online']:
asOnlineCount += 1
asOfflineCount += 1
crawl = CrawlRun()
crawl.startTime = timezone.now()
crawl.graph = net_to_json(net)
crawl.asCount = asCount
crawl.asOnlineCount = asOnlineCount
crawl.asOfflineCount = asOfflineCount
crawl.peeringCount = len(net.edges)
return crawl
def get_current_network():
net = nx.Graph()
crawl_start = time.time()
log.info("Crawl run started")
for host in ConfigHost.objects.all():
if host.checkMethod == 'CMK':
data = cmk_parser.getBGPData(host.ip, host.number)
_add_data_to_net(net, data)
elif host.checkMethod == 'GOBGP':
for entry in gobgp.get_bgp_data(host.ip):
_add_data_to_net(net, entry)
except (RouterParserException, socket.error):
log.exception("Could not get data from host %s method %s", host, host.checkMethod)
log.info("Adding last seen neighbor info")
for asls in ASLastSeen.objects.all():
if asls.asn not in net.nodes:
if any(neigh.asn in net.nodes for neigh in asls.aslastseenneighbor_set.all()):
_populate_node(net, asls.asn)
net.nodes[asls.asn]['online'] = False
for neigh in asls.aslastseenneighbor_set.all():
if neigh.asn not in net.nodes:
_populate_node(net, neigh.asn)
net.nodes[asls.asn]['online'] = False
log.info("Crawl done in %.2fs", time.time() - crawl_start)
print("{} nodes, {} edges".format(len(net.nodes), len(net.edges)))
# add id to edges
for n, (_, _, data) in enumerate(net.edges(data=True)):
data['id'] = n
# import IPython
# IPython.embed()
return net
def net_to_json(net):
"""Dum net to json, will replace all sets from the graph"""
# replace all sets with lists for better dumpability
for node in net.nodes.values():
for key, val in node.items():
if isinstance(val, set):
node[key] = list(val)
return json.dumps(nx.readwrite.json_graph.node_link_data(net))
def _populate_node(net, asn):
node = net.nodes[asn]
node.setdefault("prefixes", set())
node.setdefault("router_ids", set())
node.setdefault("routing_table", set())
node.setdefault("directly_crawled", False)
node.setdefault("online", True)
return node
def _add_data_to_net(net, data):
asn = data['local_as']
as_node = _populate_node(net, asn)
as_node['directly_crawled'] = True
for peer in data['peers']:
for route in data['routes']:
as_node['routing_table'].add((route['prefix'], tuple(route['path'])))
as_path = route['path']
if not as_path:
orig_node = _populate_node(net, as_path[0])
for n in range(len(as_path) - 1):
if as_path[n] != as_path[n + 1]:
if as_path[n + 1] not in net.nodes:
_populate_node(net, as_path[n + 1])
net.add_edge(as_path[n], as_path[n + 1])
def convert_crawl(crawl):
net = nx.Graph()
for asn in crawl.as_set.all():
if asn.number not in net.nodes:
_populate_node(net, asn.number)
d = net.nodes[asn.number]
d['online'] = asn.online
d['directly_crawled'] = asn.directlyCrawled
for br in asn.borderrouter_set.all():
for asn in crawl.as_set.all():
for peering in asn.getPeerings():
net.add_edge(peering.as1.number, peering.as2.number)
for ann in asn.announcement_set.all():
prefix = "{}/{}".format(ann.ip, ann.prefix)
path = list(map(int, ann.ASPath.split()))
net.nodes[asn.number]['routing_table'].add((prefix, tuple(path)))

backend/exceptions.py Normal file
View File

@ -0,0 +1,2 @@
class RouterParserException(Exception):

View File

@ -3,7 +3,7 @@
# Written by Sebastian Lohff (seba@someserver.de)
from django.contrib import admin
from bgpdata.models import ConfigHost, CrawlRun, CrawlLog, AS, BorderRouter, Announcement, Peering, BorderRouterPair
from bgpdata.models import ConfigHost, CrawlRun, CrawlLog, AS, BorderRouter, Announcement, Peering, BorderRouterPair, ASLastSeen, ASLastSeenNeighbor
# Register your models here.
@ -14,3 +14,5 @@ admin.site.register(BorderRouter)

View File

@ -6,8 +6,10 @@ from tastypie.resources import ModelResource, ALL_WITH_RELATIONS, ALL
from tastypie import fields
from bgpdata.models import AS, CrawlRun, Announcement, BorderRouter
class ASResource(ModelResource):
crawl = fields.ForeignKey("bgpdata.api.CrawlResource", "crawl")
class Meta:
list_allowed_methods = ['get']
detail_allowed_methods = ['get']
@ -16,13 +18,23 @@ class ASResource(ModelResource):
queryset = AS.objects.all()
resource_name = "as"
class CrawlResource(ModelResource):
class Meta:
queryset = CrawlRun.objects.all()
resource_name = "crawl"
excludes = ["graph"]
class CrawlGraphResource(ModelResource):
class Meta:
queryset = CrawlRun.objects.all()
resource_name = "crawl_graph"
class BorderRouterResource(ModelResource):
AS = fields.ForeignKey("bgpdata.api.ASResource", "AS")
class Meta:
list_allowed_methods = ['get']
detail_allowed_methods = ['get']
@ -31,8 +43,10 @@ class BorderRouterResource(ModelResource):
queryset = BorderRouter.objects.all()
resource_name = "borderrouter"
class AnnouncementResource(ModelResource):
router = fields.ForeignKey("bgpdata.api.BorderRouterResource", "router")
class Meta:
list_allowed_methods = ['get']
detail_allowed_methods = ['get']

View File

@ -23,12 +23,14 @@ class ConfigHost(models.Model):
def __str__(self):
return "%s (%s / %s)" % (self.name, self.number, self.ip)
class CrawlRun(models.Model):
# time start, time end,
# time start, time end,
startTime = models.DateTimeField()
endTime = models.DateTimeField(null=True, blank=True)
hostsCrawled = models.ManyToManyField(ConfigHost, null=True, blank=True)
graph = models.TextField()
asCount = models.IntegerField(default=0)
asOnlineCount = models.IntegerField(default=0)
@ -40,14 +42,16 @@ class CrawlRun(models.Model):
def countAS(self):
return self.asCount
def countASOnline(self):
return self.asOnlineCount
def countASOffline(self):
return self.asOfflineCount
#return self.as_set.filter(online=False).count()
def countPeerings(self):
return self.peeringCount
#return Peering.objects.filter(Q(as1__crawl=self)|Q(as2__crawl=self)).count()
class CrawlLog(models.Model):
@ -83,6 +87,7 @@ class CrawlLog(models.Model):
host = "host %s - " % self.host.name if self.host else ""
return "Log %s %s: %s%s" % (self.get_severity_display(), self.logtime, host, self.message)
class AS(models.Model):
# asno
crawl = models.ForeignKey(CrawlRun)
@ -108,7 +113,7 @@ class AS(models.Model):
def getPeerings(self):
return Peering.objects.filter(Q(as1=self)|Q(as2=self))
return Peering.objects.filter(Q(as1=self) | Q(as2=self))
def getAnnouncedPrefixes(self):
return list(set(map(lambda _x: "%(ip)s/%(prefix)s" % _x, self.announcement_set.all().values('ip', 'prefix'))))
@ -117,12 +122,13 @@ class AS(models.Model):
if self.lastSeen:
return self.lastSeen.startTime.strftime("%d.%m.%Y %H:%I")
class BorderRouter(models.Model):
# as id, ip, check method, pingable, reachable
# unique: (crawl_id, asno, as id)
AS = models.ForeignKey(AS)
routerID = models.GenericIPAddressField()
pingable = models.BooleanField(default=False)
reachable = models.BooleanField(default=False)
@ -131,6 +137,7 @@ class BorderRouter(models.Model):
r = "r" if self.reachable else "!r"
return "Router %s (AS %s, %s%s)" % (self.routerID, self.AS.number, p, r)
class Announcement(models.Model):
router = models.ForeignKey(BorderRouter)
@ -146,6 +153,7 @@ class Announcement(models.Model):
def __str__(self):
return "%s/%s via %s (crawl %s)" % (self.ip, self.prefix, self.ASPath, self.router.AS.crawl.pk)
class Peering(models.Model):
DIRECT = 'direct'
PATH = 'path'
@ -159,12 +167,13 @@ class Peering(models.Model):
('as1', 'as2'),
as1 = models.ForeignKey(AS, related_name='peering1')
as1 = models.ForeignKey(AS, related_name='peering1')
as2 = models.ForeignKey(AS, related_name='peering2')
origin = models.CharField(max_length=10, choices=ORIGIN)
def __str__(self):
return "AS %s <--> AS %s (%s, crawl %s)" % (self.as1.number, self.as2.number, self.get_origin_display(), self.as1.crawl.pk)
return "AS %s <--> AS %s (%s, crawl %s)" % (self.as1.number, self.as2.number,
self.get_origin_display(), self.as1.crawl.pk)
def containsAS(self, AS):
return AS in (self.as1, self.as2)
@ -177,6 +186,7 @@ class Peering(models.Model):
except Peering.DoesNotExist:
return Peering.objects.get(as1=as2, as2=as1)
class BorderRouterPair(models.Model):
peering = models.ForeignKey(Peering)
router1 = models.ForeignKey(BorderRouter, default=None, blank=True, null=True, related_name='routerpair1')
@ -191,3 +201,20 @@ class BorderRouterPair(models.Model):
return BorderRouterPair.objects.get(peering=peering, router1=router1, router2=router2)
except BorderRouterPair.DoesNotExist:
return BorderRouterPair.objects.get(peering=peering, router1=router2, router2=router1)
class ASLastSeen(models.Model):
asn = models.IntegerField(db_index=True, unique=True)
directlyCrawled = models.BooleanField(default=False)
online = models.BooleanField()
lastSeen = models.DateTimeField(blank=True, null=True)
crawlLastSeen = models.ForeignKey(CrawlRun)
def __str__(self):
return ("AS{} {}, last seen {} (crawl {})"
.format(self.asn, "online" if self.online else "offline", self.lastSeen, self.crawlLastSeen.pk))
class ASLastSeenNeighbor(models.Model):
asn = models.IntegerField()
neighbor = models.ForeignKey(ASLastSeen)

View File

@ -4,23 +4,25 @@
from django.conf.urls import url, include
from bgpdata.api import ASResource, CrawlResource, BorderRouterResource, AnnouncementResource
from bgpdata.api import ASResource, CrawlResource, CrawlGraphResource, BorderRouterResource, AnnouncementResource
from bgpdata import views as bgpdata_views
asResource = ASResource()
crawlResource = CrawlResource()
crawlGraphResource = CrawlGraphResource()
borderRouterResource = BorderRouterResource()
announcementResource = AnnouncementResource()
urlpatterns = (
url(r'^$', bgpdata_views.overview),
url(r'^([0-9]+)/$', bgpdata_views.showMap),
url(r'^new/([0-9]+|live)/$', bgpdata_views.show_new_map),
#url(r'^api/crawl/(?P<crawlID>\d+)/asses/$', 'bgpdata.api.asses'),
#(r'^api/', include(asResource.urls)),
# url(r'^api/crawl/(?P<crawlID>\d+)/asses/$', 'bgpdata.api.asses'),
# (r'^api/', include(asResource.urls)),
url(r'^api/', include(asResource.urls)),
url(r'^api/', include(crawlResource.urls)),
url(r'^api/', include(crawlGraphResource.urls)),
url(r'^api/', include(borderRouterResource.urls)),
url(r'^api/', include(announcementResource.urls)),

View File

@ -1,16 +1,19 @@
# This file is part of dnmapper, an AS--level mapping tool
# Licensed under GNU General Public License v3 or later
# Written by Sebastian Lohff (seba@someserver.de)
from django.shortcuts import render
from bgpdata.models import CrawlRun, AS, Peering
from django.core.paginator import Paginator
from bgpdata.models import CrawlRun, AS, Peering
from backend import crawler
def overview(request):
crawls = CrawlRun.objects.order_by("-startTime")
crawlsPage = Paginator(crawls, 200)
return render(request, 'bgpdata/overview.html', {"crawls": crawlsPage.page(1)})
def showMap(request, crawlId):
crawl = None
@ -22,3 +25,17 @@ def showMap(request, crawlId):
peerings = Peering.objects.filter(as1__crawl=crawl)
return render(request, 'bgpdata/map.html', {"crawl": crawl, 'ASses': ASses, 'peerings': peerings})
def show_new_map(request, crawl_id):
crawl = None
if crawl_id == 'live':
net = crawler.get_current_network()
crawl = crawler.make_crawl_from_net(net)
crawl = CrawlRun.objects.get(id=crawl_id)
except CrawlRun.DoesNotExist:
return render(request, "bgpdata/no-map-found.html", {"crawl_id": crawl_id})
return render(request, 'bgpdata/new_new_map.html', {"crawl": crawl})

bin/conv.py Executable file
View File

@ -0,0 +1,28 @@
#!/usr/bin/env python
import argparse
import os
import sys
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dnmapper.settings")
import django
from backend.crawler import convert_crawl
from bgpdata.models import CrawlRun
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--crawl-id", type=int)
args = parser.parse_args()
crawl = CrawlRun.objects.get(pk=args.crawl_id)
except CrawlRun.DoesNotExist:
parser.error("CrawlRun with id {} does not exist".format(args.crawl_id))
if __name__ == '__main__':

View File

@ -19,8 +19,7 @@ from django.utils import timezone
from django.db.models import Q, Max
from bgpdata.models import ConfigHost, CrawlRun, CrawlLog, AS, BorderRouter, Announcement, Peering, BorderRouterPair
from routerparsers import getBGPData, RouterParserException
from backend import gobgp
from backend import gobgp, cmk_parser
# config
@ -53,13 +52,13 @@ def main():
print(" -- Getting data for host %s" % host)
if host.checkMethod == 'CMK':
data = [getBGPData(host.ip, host.number)]
data = [cmk_parser.getBGPData(host.ip, host.number)]
elif host.checkMethod == 'GOBGP':
data = gobgp.get_bgp_data(host.ip)
CrawlLog.log(crawl, "Method %s is not currently supported, skipping host" % host.checkMethod, host=host, severity=CrawlLog.ERROR)
except RouterParserException as e:
except cmk_parser.RouterParserException as e:
msg = "Could not parse data for host: %s" % str(e)
print("%s: %s" % (host, msg))
CrawlLog.log(crawl, msg, host=host, severity=CrawlLog.ERROR)

bin/netcrawl.py Executable file
View File

@ -0,0 +1,17 @@
#!/usr/bin/env python
import os
import sys
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dnmapper.settings")
import django
from backend.crawler import crawl
def main():
if __name__ == '__main__':