WIP: Rework complete backend

New crawler, new gobgp based backend, crawls are now based on networkx
This commit is contained in:
Sebastian Lohff 2020-06-06 17:46:08 +02:00
parent 87642cc4d9
commit 285ee74560
11 changed files with 325 additions and 24 deletions

View File

@ -4,13 +4,12 @@
# Written by Sebastian Lohff (seba@someserver.de) # Written by Sebastian Lohff (seba@someserver.de)
from __future__ import print_function from __future__ import print_function
from collections import OrderedDict
import re import re
import socket import socket
from collections import OrderedDict from backend.exceptions import RouterParserException
class RouterParserException(Exception):
pass
def err(msg): def err(msg):
raise RouterParserException(msg) raise RouterParserException(msg)
@ -27,6 +26,7 @@ def getBGPData(ip, asno):
return router return router
def getDataFromHost(ip): def getDataFromHost(ip):
socket.setdefaulttimeout(5) socket.setdefaulttimeout(5)
x = socket.socket() x = socket.socket()
@ -184,9 +184,9 @@ def _birdFindRoutes(info):
for key in ["path", "nexthop", "network", "iBGP"]: for key in ["path", "nexthop", "network", "iBGP"]:
if key not in candidate: if key not in candidate:
return return
route = {"prefix": candidate["network"], "nexthop": candidate["nexthop"], "path": candidate["path"], "iBGP": candidate["iBGP"]} route = {"prefix": candidate["network"], "nexthop": candidate["nexthop"],
"path": list(map(int, candidate["path"])), "iBGP": candidate["iBGP"]}
routes.append(route) routes.append(route)
pass
routes = [] routes = []
candidate = None candidate = None
@ -347,7 +347,8 @@ def _quaggaFindRoutes(raw):
# currently skip incomplete routes # currently skip incomplete routes
if '?' not in path: if '?' not in path:
route = {"prefix": d["network"], "nexthop": d["nexthop"], "path": path, "iBGP": d["origin"] == "i"} route = {"prefix": d["network"], "nexthop": d["nexthop"],
"path": list(map(int, path)), "iBGP": d["origin"] == "i"}
routes.append(route) routes.append(route)
return routes return routes

192
backend/crawler.py Normal file
View File

@ -0,0 +1,192 @@
import json
import logging
import socket
import time
from django.utils import timezone
import networkx as nx
from backend import gobgp, cmk_parser
from backend.exceptions import RouterParserException
from bgpdata.models import ConfigHost, ASLastSeen, ASLastSeenNeighbor, CrawlRun
log = logging.getLogger(__name__)
FORMAT = '%(asctime)-15s %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
log.setLevel(logging.INFO)
def crawl():
net = get_current_network()
crawl = make_crawl_from_net(net)
crawl.save()
# handle last seen ASNs
log.info("Updating last seen info")
last_seen = {}
for asls in ASLastSeen.objects.all():
last_seen[asls.asn] = asls
for asn, node in net.nodes(data=True):
if asn not in last_seen:
last_seen[asn] = ASLastSeen(asn=asn)
asls = last_seen[asn]
# print(asn, node)
asls.online = node['online']
if node['online']:
asls.directlyCrawled = node['directly_crawled']
asls.lastSeen = timezone.now()
asls.crawlLastSeen = crawl
else:
asls.directlyCrawled = False
asls.save()
if asls.online:
neighs = net.neighbors(asn)
db_neighs = set()
for db_neigh in asls.aslastseenneighbor_set.all():
if db_neigh.asn in neighs:
db_neighs.add(asls.asn)
else:
db_neigh.delete()
for neigh in neighs:
if neigh not in db_neighs:
asneigh = ASLastSeenNeighbor(asn=neigh, neighbor=asls)
asneigh.save()
db_neighs.add(neigh)
log.info("Automated crawl done")
def make_crawl_from_net(net):
"""Create a CrawlRun, but don't save it"""
asCount = asOnlineCount = asOfflineCount = 0
for asn, node in net.nodes(data=True):
asCount += 1
if node['online']:
asOnlineCount += 1
else:
asOfflineCount += 1
crawl = CrawlRun()
crawl.startTime = timezone.now()
crawl.graph = net_to_json(net)
crawl.asCount = asCount
crawl.asOnlineCount = asOnlineCount
crawl.asOfflineCount = asOfflineCount
crawl.peeringCount = len(net.edges)
return crawl
def get_current_network():
net = nx.Graph()
crawl_start = time.time()
log.info("Crawl run started")
for host in ConfigHost.objects.all():
try:
if host.checkMethod == 'CMK':
data = cmk_parser.getBGPData(host.ip, host.number)
_add_data_to_net(net, data)
elif host.checkMethod == 'GOBGP':
for entry in gobgp.get_bgp_data(host.ip):
_add_data_to_net(net, entry)
except (RouterParserException, socket.error):
log.exception("Could not get data from host %s method %s", host, host.checkMethod)
continue
log.info("Adding last seen neighbor info")
for asls in ASLastSeen.objects.all():
if asls.asn not in net.nodes:
if any(neigh.asn in net.nodes for neigh in asls.aslastseenneighbor_set.all()):
_populate_node(net, asls.asn)
net.nodes[asls.asn]['online'] = False
for neigh in asls.aslastseenneighbor_set.all():
if neigh.asn not in net.nodes:
_populate_node(net, neigh.asn)
net.nodes[asls.asn]['online'] = False
log.info("Crawl done in %.2fs", time.time() - crawl_start)
print("{} nodes, {} edges".format(len(net.nodes), len(net.edges)))
# add id to edges
for n, (_, _, data) in enumerate(net.edges(data=True)):
data['id'] = n
# import IPython
# IPython.embed()
return net
def net_to_json(net):
"""Dum net to json, will replace all sets from the graph"""
# replace all sets with lists for better dumpability
for node in net.nodes.values():
for key, val in node.items():
if isinstance(val, set):
node[key] = list(val)
return json.dumps(nx.readwrite.json_graph.node_link_data(net))
def _populate_node(net, asn):
net.add_node(asn)
node = net.nodes[asn]
node.setdefault("prefixes", set())
node.setdefault("router_ids", set())
node.setdefault("routing_table", set())
node.setdefault("directly_crawled", False)
node.setdefault("online", True)
return node
def _add_data_to_net(net, data):
asn = data['local_as']
as_node = _populate_node(net, asn)
as_node['router_ids'].add(data['local_id'])
as_node['directly_crawled'] = True
for peer in data['peers']:
pass
for route in data['routes']:
as_node['routing_table'].add((route['prefix'], tuple(route['path'])))
as_path = route['path']
if not as_path:
continue
orig_node = _populate_node(net, as_path[0])
orig_node['prefixes'].add(route['prefix'])
for n in range(len(as_path) - 1):
if as_path[n] != as_path[n + 1]:
if as_path[n + 1] not in net.nodes:
_populate_node(net, as_path[n + 1])
net.add_edge(as_path[n], as_path[n + 1])
def convert_crawl(crawl):
net = nx.Graph()
for asn in crawl.as_set.all():
if asn.number not in net.nodes:
_populate_node(net, asn.number)
d = net.nodes[asn.number]
d['online'] = asn.online
d['directly_crawled'] = asn.directlyCrawled
for br in asn.borderrouter_set.all():
d['router_ids'].add(br.routerID)
for asn in crawl.as_set.all():
for peering in asn.getPeerings():
net.add_edge(peering.as1.number, peering.as2.number)
for ann in asn.announcement_set.all():
prefix = "{}/{}".format(ann.ip, ann.prefix)
path = list(map(int, ann.ASPath.split()))
net.nodes[asn.number]['routing_table'].add((prefix, tuple(path)))
net.nodes[path[-1]]['prefixes'].add(prefix)
print(net_to_json(net))

2
backend/exceptions.py Normal file
View File

@ -0,0 +1,2 @@
class RouterParserException(Exception):
pass

View File

@ -3,7 +3,7 @@
# Written by Sebastian Lohff (seba@someserver.de) # Written by Sebastian Lohff (seba@someserver.de)
from django.contrib import admin from django.contrib import admin
from bgpdata.models import ConfigHost, CrawlRun, CrawlLog, AS, BorderRouter, Announcement, Peering, BorderRouterPair from bgpdata.models import ConfigHost, CrawlRun, CrawlLog, AS, BorderRouter, Announcement, Peering, BorderRouterPair, ASLastSeen, ASLastSeenNeighbor
# Register your models here. # Register your models here.
admin.site.register(ConfigHost) admin.site.register(ConfigHost)
@ -14,3 +14,5 @@ admin.site.register(BorderRouter)
admin.site.register(Announcement) admin.site.register(Announcement)
admin.site.register(Peering) admin.site.register(Peering)
admin.site.register(BorderRouterPair) admin.site.register(BorderRouterPair)
admin.site.register(ASLastSeen)
admin.site.register(ASLastSeenNeighbor)

View File

@ -6,8 +6,10 @@ from tastypie.resources import ModelResource, ALL_WITH_RELATIONS, ALL
from tastypie import fields from tastypie import fields
from bgpdata.models import AS, CrawlRun, Announcement, BorderRouter from bgpdata.models import AS, CrawlRun, Announcement, BorderRouter
class ASResource(ModelResource): class ASResource(ModelResource):
crawl = fields.ForeignKey("bgpdata.api.CrawlResource", "crawl") crawl = fields.ForeignKey("bgpdata.api.CrawlResource", "crawl")
class Meta: class Meta:
list_allowed_methods = ['get'] list_allowed_methods = ['get']
detail_allowed_methods = ['get'] detail_allowed_methods = ['get']
@ -16,13 +18,23 @@ class ASResource(ModelResource):
queryset = AS.objects.all() queryset = AS.objects.all()
resource_name = "as" resource_name = "as"
class CrawlResource(ModelResource): class CrawlResource(ModelResource):
class Meta: class Meta:
queryset = CrawlRun.objects.all() queryset = CrawlRun.objects.all()
resource_name = "crawl" resource_name = "crawl"
excludes = ["graph"]
class CrawlGraphResource(ModelResource):
class Meta:
queryset = CrawlRun.objects.all()
resource_name = "crawl_graph"
class BorderRouterResource(ModelResource): class BorderRouterResource(ModelResource):
AS = fields.ForeignKey("bgpdata.api.ASResource", "AS") AS = fields.ForeignKey("bgpdata.api.ASResource", "AS")
class Meta: class Meta:
list_allowed_methods = ['get'] list_allowed_methods = ['get']
detail_allowed_methods = ['get'] detail_allowed_methods = ['get']
@ -31,8 +43,10 @@ class BorderRouterResource(ModelResource):
queryset = BorderRouter.objects.all() queryset = BorderRouter.objects.all()
resource_name = "borderrouter" resource_name = "borderrouter"
class AnnouncementResource(ModelResource): class AnnouncementResource(ModelResource):
router = fields.ForeignKey("bgpdata.api.BorderRouterResource", "router") router = fields.ForeignKey("bgpdata.api.BorderRouterResource", "router")
class Meta: class Meta:
list_allowed_methods = ['get'] list_allowed_methods = ['get']
detail_allowed_methods = ['get'] detail_allowed_methods = ['get']

View File

@ -23,12 +23,14 @@ class ConfigHost(models.Model):
def __str__(self): def __str__(self):
return "%s (%s / %s)" % (self.name, self.number, self.ip) return "%s (%s / %s)" % (self.name, self.number, self.ip)
class CrawlRun(models.Model): class CrawlRun(models.Model):
# time start, time end, # time start, time end,
startTime = models.DateTimeField() startTime = models.DateTimeField()
endTime = models.DateTimeField(null=True, blank=True) endTime = models.DateTimeField(null=True, blank=True)
hostsCrawled = models.ManyToManyField(ConfigHost, null=True, blank=True) hostsCrawled = models.ManyToManyField(ConfigHost, null=True, blank=True)
graph = models.TextField()
asCount = models.IntegerField(default=0) asCount = models.IntegerField(default=0)
asOnlineCount = models.IntegerField(default=0) asOnlineCount = models.IntegerField(default=0)
@ -40,14 +42,16 @@ class CrawlRun(models.Model):
def countAS(self): def countAS(self):
return self.asCount return self.asCount
def countASOnline(self): def countASOnline(self):
return self.asOnlineCount return self.asOnlineCount
def countASOffline(self): def countASOffline(self):
return self.asOfflineCount return self.asOfflineCount
#return self.as_set.filter(online=False).count()
def countPeerings(self): def countPeerings(self):
return self.peeringCount return self.peeringCount
#return Peering.objects.filter(Q(as1__crawl=self)|Q(as2__crawl=self)).count()
class CrawlLog(models.Model): class CrawlLog(models.Model):
INFO = 'INFO' INFO = 'INFO'
@ -83,6 +87,7 @@ class CrawlLog(models.Model):
host = "host %s - " % self.host.name if self.host else "" host = "host %s - " % self.host.name if self.host else ""
return "Log %s %s: %s%s" % (self.get_severity_display(), self.logtime, host, self.message) return "Log %s %s: %s%s" % (self.get_severity_display(), self.logtime, host, self.message)
class AS(models.Model): class AS(models.Model):
# asno # asno
crawl = models.ForeignKey(CrawlRun) crawl = models.ForeignKey(CrawlRun)
@ -108,7 +113,7 @@ class AS(models.Model):
self.save() self.save()
def getPeerings(self): def getPeerings(self):
return Peering.objects.filter(Q(as1=self)|Q(as2=self)) return Peering.objects.filter(Q(as1=self) | Q(as2=self))
def getAnnouncedPrefixes(self): def getAnnouncedPrefixes(self):
return list(set(map(lambda _x: "%(ip)s/%(prefix)s" % _x, self.announcement_set.all().values('ip', 'prefix')))) return list(set(map(lambda _x: "%(ip)s/%(prefix)s" % _x, self.announcement_set.all().values('ip', 'prefix'))))
@ -117,6 +122,7 @@ class AS(models.Model):
if self.lastSeen: if self.lastSeen:
return self.lastSeen.startTime.strftime("%d.%m.%Y %H:%I") return self.lastSeen.startTime.strftime("%d.%m.%Y %H:%I")
class BorderRouter(models.Model): class BorderRouter(models.Model):
# as id, ip, check method, pingable, reachable # as id, ip, check method, pingable, reachable
# unique: (crawl_id, asno, as id) # unique: (crawl_id, asno, as id)
@ -131,6 +137,7 @@ class BorderRouter(models.Model):
r = "r" if self.reachable else "!r" r = "r" if self.reachable else "!r"
return "Router %s (AS %s, %s%s)" % (self.routerID, self.AS.number, p, r) return "Router %s (AS %s, %s%s)" % (self.routerID, self.AS.number, p, r)
class Announcement(models.Model): class Announcement(models.Model):
router = models.ForeignKey(BorderRouter) router = models.ForeignKey(BorderRouter)
@ -146,6 +153,7 @@ class Announcement(models.Model):
def __str__(self): def __str__(self):
return "%s/%s via %s (crawl %s)" % (self.ip, self.prefix, self.ASPath, self.router.AS.crawl.pk) return "%s/%s via %s (crawl %s)" % (self.ip, self.prefix, self.ASPath, self.router.AS.crawl.pk)
class Peering(models.Model): class Peering(models.Model):
DIRECT = 'direct' DIRECT = 'direct'
PATH = 'path' PATH = 'path'
@ -159,12 +167,13 @@ class Peering(models.Model):
('as1', 'as2'), ('as1', 'as2'),
) )
as1 = models.ForeignKey(AS, related_name='peering1') as1 = models.ForeignKey(AS, related_name='peering1')
as2 = models.ForeignKey(AS, related_name='peering2') as2 = models.ForeignKey(AS, related_name='peering2')
origin = models.CharField(max_length=10, choices=ORIGIN) origin = models.CharField(max_length=10, choices=ORIGIN)
def __str__(self): def __str__(self):
return "AS %s <--> AS %s (%s, crawl %s)" % (self.as1.number, self.as2.number, self.get_origin_display(), self.as1.crawl.pk) return "AS %s <--> AS %s (%s, crawl %s)" % (self.as1.number, self.as2.number,
self.get_origin_display(), self.as1.crawl.pk)
def containsAS(self, AS): def containsAS(self, AS):
return AS in (self.as1, self.as2) return AS in (self.as1, self.as2)
@ -177,6 +186,7 @@ class Peering(models.Model):
except Peering.DoesNotExist: except Peering.DoesNotExist:
return Peering.objects.get(as1=as2, as2=as1) return Peering.objects.get(as1=as2, as2=as1)
class BorderRouterPair(models.Model): class BorderRouterPair(models.Model):
peering = models.ForeignKey(Peering) peering = models.ForeignKey(Peering)
router1 = models.ForeignKey(BorderRouter, default=None, blank=True, null=True, related_name='routerpair1') router1 = models.ForeignKey(BorderRouter, default=None, blank=True, null=True, related_name='routerpair1')
@ -191,3 +201,20 @@ class BorderRouterPair(models.Model):
return BorderRouterPair.objects.get(peering=peering, router1=router1, router2=router2) return BorderRouterPair.objects.get(peering=peering, router1=router1, router2=router2)
except BorderRouterPair.DoesNotExist: except BorderRouterPair.DoesNotExist:
return BorderRouterPair.objects.get(peering=peering, router1=router2, router2=router1) return BorderRouterPair.objects.get(peering=peering, router1=router2, router2=router1)
class ASLastSeen(models.Model):
asn = models.IntegerField(db_index=True, unique=True)
directlyCrawled = models.BooleanField(default=False)
online = models.BooleanField()
lastSeen = models.DateTimeField(blank=True, null=True)
crawlLastSeen = models.ForeignKey(CrawlRun)
def __str__(self):
return ("AS{} {}, last seen {} (crawl {})"
.format(self.asn, "online" if self.online else "offline", self.lastSeen, self.crawlLastSeen.pk))
class ASLastSeenNeighbor(models.Model):
asn = models.IntegerField()
neighbor = models.ForeignKey(ASLastSeen)

View File

@ -4,23 +4,25 @@
from django.conf.urls import url, include from django.conf.urls import url, include
from bgpdata.api import ASResource, CrawlResource, BorderRouterResource, AnnouncementResource from bgpdata.api import ASResource, CrawlResource, CrawlGraphResource, BorderRouterResource, AnnouncementResource
from bgpdata import views as bgpdata_views from bgpdata import views as bgpdata_views
asResource = ASResource() asResource = ASResource()
crawlResource = CrawlResource() crawlResource = CrawlResource()
crawlGraphResource = CrawlGraphResource()
borderRouterResource = BorderRouterResource() borderRouterResource = BorderRouterResource()
announcementResource = AnnouncementResource() announcementResource = AnnouncementResource()
urlpatterns = ( urlpatterns = (
url(r'^$', bgpdata_views.overview), url(r'^$', bgpdata_views.overview),
url(r'^([0-9]+)/$', bgpdata_views.showMap), url(r'^([0-9]+)/$', bgpdata_views.showMap),
url(r'^new/([0-9]+|live)/$', bgpdata_views.show_new_map),
#url(r'^api/crawl/(?P<crawlID>\d+)/asses/$', 'bgpdata.api.asses'), # url(r'^api/crawl/(?P<crawlID>\d+)/asses/$', 'bgpdata.api.asses'),
#(r'^api/', include(asResource.urls)), # (r'^api/', include(asResource.urls)),
url(r'^api/', include(asResource.urls)), url(r'^api/', include(asResource.urls)),
url(r'^api/', include(crawlResource.urls)), url(r'^api/', include(crawlResource.urls)),
url(r'^api/', include(crawlGraphResource.urls)),
url(r'^api/', include(borderRouterResource.urls)), url(r'^api/', include(borderRouterResource.urls)),
url(r'^api/', include(announcementResource.urls)), url(r'^api/', include(announcementResource.urls)),
) )

View File

@ -1,16 +1,19 @@
# This file is part of dnmapper, an AS--level mapping tool # This file is part of dnmapper, an AS--level mapping tool
# Licensed under GNU General Public License v3 or later # Licensed under GNU General Public License v3 or later
# Written by Sebastian Lohff (seba@someserver.de) # Written by Sebastian Lohff (seba@someserver.de)
from django.shortcuts import render from django.shortcuts import render
from bgpdata.models import CrawlRun, AS, Peering
from django.core.paginator import Paginator from django.core.paginator import Paginator
from bgpdata.models import CrawlRun, AS, Peering
from backend import crawler
def overview(request): def overview(request):
crawls = CrawlRun.objects.order_by("-startTime") crawls = CrawlRun.objects.order_by("-startTime")
crawlsPage = Paginator(crawls, 200) crawlsPage = Paginator(crawls, 200)
return render(request, 'bgpdata/overview.html', {"crawls": crawlsPage.page(1)}) return render(request, 'bgpdata/overview.html', {"crawls": crawlsPage.page(1)})
def showMap(request, crawlId): def showMap(request, crawlId):
crawl = None crawl = None
try: try:
@ -22,3 +25,17 @@ def showMap(request, crawlId):
peerings = Peering.objects.filter(as1__crawl=crawl) peerings = Peering.objects.filter(as1__crawl=crawl)
return render(request, 'bgpdata/map.html', {"crawl": crawl, 'ASses': ASses, 'peerings': peerings}) return render(request, 'bgpdata/map.html', {"crawl": crawl, 'ASses': ASses, 'peerings': peerings})
def show_new_map(request, crawl_id):
crawl = None
if crawl_id == 'live':
net = crawler.get_current_network()
crawl = crawler.make_crawl_from_net(net)
else:
try:
crawl = CrawlRun.objects.get(id=crawl_id)
except CrawlRun.DoesNotExist:
return render(request, "bgpdata/no-map-found.html", {"crawl_id": crawl_id})
return render(request, 'bgpdata/new_new_map.html', {"crawl": crawl})

28
bin/conv.py Executable file
View File

@ -0,0 +1,28 @@
#!/usr/bin/env python
import argparse
import os
import sys
sys.path.append("..")
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dnmapper.settings")
import django
django.setup()
from backend.crawler import convert_crawl
from bgpdata.models import CrawlRun
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--crawl-id", type=int)
args = parser.parse_args()
try:
crawl = CrawlRun.objects.get(pk=args.crawl_id)
except CrawlRun.DoesNotExist:
parser.error("CrawlRun with id {} does not exist".format(args.crawl_id))
convert_crawl(crawl)
if __name__ == '__main__':
main()

View File

@ -19,8 +19,7 @@ from django.utils import timezone
from django.db.models import Q, Max from django.db.models import Q, Max
from bgpdata.models import ConfigHost, CrawlRun, CrawlLog, AS, BorderRouter, Announcement, Peering, BorderRouterPair from bgpdata.models import ConfigHost, CrawlRun, CrawlLog, AS, BorderRouter, Announcement, Peering, BorderRouterPair
from routerparsers import getBGPData, RouterParserException from backend import gobgp, cmk_parser
from backend import gobgp
# config # config
LAST_SEEN_DAYS = 7 LAST_SEEN_DAYS = 7
@ -53,13 +52,13 @@ def main():
print(" -- Getting data for host %s" % host) print(" -- Getting data for host %s" % host)
try: try:
if host.checkMethod == 'CMK': if host.checkMethod == 'CMK':
data = [getBGPData(host.ip, host.number)] data = [cmk_parser.getBGPData(host.ip, host.number)]
elif host.checkMethod == 'GOBGP': elif host.checkMethod == 'GOBGP':
data = gobgp.get_bgp_data(host.ip) data = gobgp.get_bgp_data(host.ip)
else: else:
CrawlLog.log(crawl, "Method %s is not currently supported, skipping host" % host.checkMethod, host=host, severity=CrawlLog.ERROR) CrawlLog.log(crawl, "Method %s is not currently supported, skipping host" % host.checkMethod, host=host, severity=CrawlLog.ERROR)
continue continue
except RouterParserException as e: except cmk_parser.RouterParserException as e:
msg = "Could not parse data for host: %s" % str(e) msg = "Could not parse data for host: %s" % str(e)
print("%s: %s" % (host, msg)) print("%s: %s" % (host, msg))
CrawlLog.log(crawl, msg, host=host, severity=CrawlLog.ERROR) CrawlLog.log(crawl, msg, host=host, severity=CrawlLog.ERROR)

17
bin/netcrawl.py Executable file
View File

@ -0,0 +1,17 @@
#!/usr/bin/env python
import os
import sys
sys.path.append("..")
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dnmapper.settings")
import django
django.setup()
from backend.crawler import crawl
def main():
crawl()
if __name__ == '__main__':
main()