dnmapper/bin/crawl.py

232 lines
8.7 KiB
Python
Raw Normal View History

2020-05-31 22:10:26 +02:00
#!/usr/bin/env python
2018-01-19 13:28:52 +01:00
# This file is part of dnmapper, an AS--level mapping tool
# Licensed under GNU General Public License v3 or later
# Written by Sebastian Lohff (seba@someserver.de)
2015-03-23 01:42:31 +01:00
from __future__ import print_function
# prepare environment
import sys
sys.path.append("..")
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dnmapper.settings")
import django
django.setup()
2017-01-14 00:13:45 +01:00
import socket
2015-03-27 15:33:54 +01:00
import datetime
2015-03-23 01:42:31 +01:00
from django.utils import timezone
2015-03-27 15:33:54 +01:00
from django.db.models import Q, Max
2015-03-23 01:42:31 +01:00
from bgpdata.models import ConfigHost, CrawlRun, CrawlLog, AS, BorderRouter, Announcement, Peering, BorderRouterPair
from backend import gobgp, cmk_parser
2020-05-31 22:10:26 +02:00
# config
LAST_SEEN_DAYS = 7
2015-03-23 01:42:31 +01:00
2015-03-27 15:33:54 +01:00
def getOrCreateAS(crawl, number, online=True):
currAS = None
try:
currAS = AS.objects.get(crawl=crawl, number=number)
if online:
currAS.setOnline()
except AS.DoesNotExist:
currAS = AS(crawl=crawl, number=number, online=online)
currAS.save()
2015-03-23 01:42:31 +01:00
return currAS
2015-03-23 01:42:31 +01:00
def main():
# 1. create crawl run
crawl = CrawlRun()
crawl.startTime = timezone.now()
crawl.save()
CrawlLog.log(crawl, "Starting crawl run!", severity=CrawlLog.INFO)
# 2. get data from all hosts, put it in the database
for host in ConfigHost.objects.all():
crawl.hostsCrawled.add(host)
data = None
print(" -- Getting data for host %s" % host)
try:
if host.checkMethod == 'CMK':
data = [cmk_parser.getBGPData(host.ip, host.number)]
2020-05-31 22:10:26 +02:00
elif host.checkMethod == 'GOBGP':
data = gobgp.get_bgp_data(host.ip)
else:
CrawlLog.log(crawl, "Method %s is not currently supported, skipping host" % host.checkMethod, host=host, severity=CrawlLog.ERROR)
continue
except cmk_parser.RouterParserException as e:
msg = "Could not parse data for host: %s" % str(e)
print("%s: %s" % (host, msg))
CrawlLog.log(crawl, msg, host=host, severity=CrawlLog.ERROR)
continue
except socket.error as e:
msg = "Could not reach host: %s" % (e,)
print("%s: %s" % (host, msg))
CrawlLog.log(crawl, msg, host=host, severity=CrawlLog.ERROR)
continue
2020-05-31 22:10:26 +02:00
for elem in data:
parseData(crawl, host, elem)
# 3. calculate missing data
print(" -- Adding extra data from announcements...")
# 3.1. use announcement data to find hidden peerings
for announcement in Announcement.objects.filter(router__AS__crawl=crawl):
path = announcement.ASPath.split(" ")
if len(path) > 1:
firstASno = path.pop(0)
firstAS = getOrCreateAS(crawl, firstASno)
while len(path) > 0:
secondASno = path.pop(0)
secondAS = getOrCreateAS(crawl, secondASno)
try:
Peering.getPeering(firstAS, secondAS)
except Peering.DoesNotExist:
peering = Peering(as1=firstAS, as2=secondAS, origin=Peering.PATH)
peering.save()
firstAS = secondAS
# 3.2 add ASses, routers and peerings from old crawlruns (last should suffice)
2020-05-31 22:10:26 +02:00
# find
print(" --> copy old ASses")
timerangeStart = crawl.startTime - datetime.timedelta(LAST_SEEN_DAYS)
2020-05-31 22:10:26 +02:00
oldASses = AS.objects.filter(online=True, crawl__startTime__gte=timerangeStart) \
.values("number").annotate(lastSeen=Max('crawl_id')).filter(~Q(lastSeen=crawl.pk))
# 3.2.1. copy old asses
print(" ----> create ASses")
for oldASdata in oldASses:
print(" ------> AS", oldASdata["number"])
oldAS = AS.objects.get(number=oldASdata["number"], crawl=oldASdata["lastSeen"])
try:
newAS = AS.objects.get(number=oldAS.number, crawl=crawl)
if not newAS.online and not newAS.lastSeen:
newAS.lastSeen = oldAS.crawl
newAS.save()
2020-05-31 22:10:26 +02:00
except Exception:
newAS = AS(number=oldAS.number, crawl=crawl, lastSeen=oldAS.crawl, directlyCrawled=False, online=False)
newAS.save()
# 3.2.2 copy peerings between old asses
print(" ----> copy peerings")
for oldASdata in oldASses:
print(" ------> AS", oldASdata["number"])
oldAS = AS.objects.get(number=oldASdata["number"], crawl=oldASdata["lastSeen"])
for peering in oldAS.getPeerings():
print(" --------> Peering %s <--> %s" % (peering.as1.number, peering.as2.number))
peering = Peering(
2020-05-31 22:10:26 +02:00
as1=AS.objects.get(number=peering.as1.number, crawl=crawl),
as2=AS.objects.get(number=peering.as2.number, crawl=crawl),
origin=peering.origin)
peering.save()
# 3.3 FIXME: do we also want to have old peerings which do not exist anymore?
# 4. end crawl run
crawl.endTime = timezone.now()
crawl.save()
# additional data
crawl.asCount = crawl.as_set.count()
crawl.asOnlineCount = crawl.as_set.filter(online=True).count()
crawl.asOfflineCount = crawl.as_set.filter(online=False).count()
2020-05-31 22:10:26 +02:00
crawl.peeringCount = Peering.objects.filter(Q(as1__crawl=crawl) | Q(as2__crawl=crawl)).count()
crawl.save()
print(" !! Done")
CrawlLog.log(crawl, "Crawl completed", severity=CrawlLog.INFO)
2015-03-23 01:42:31 +01:00
2020-05-31 22:10:26 +02:00
def parseData(crawl, host, data):
print(" -- parsing...")
currASno = int(data["local_as"])
currAS = getOrCreateAS(crawl, currASno)
currAS.directlyCrawled = True
currAS.save()
currRouter = None
try:
currRouter = BorderRouter.objects.get(AS=currAS, routerID=data["local_id"])
currRouter.pingable = True
currRouter.reachable = True
currRouter.save()
except BorderRouter.DoesNotExist:
currRouter = BorderRouter(AS=currAS, routerID=data["local_id"], pingable=True, reachable=True)
currRouter.save()
print(" --> peers")
for peer in data["peers"]:
# peerings
# data: BGP{state, neighbor_id, neighbor_as}, description
# a) find/create neighbor
print(" ----> Peer:", int(peer["BGP"]["neighbor_as"]))
neighAS = getOrCreateAS(crawl, int(peer["BGP"]["neighbor_as"]), online=peer["BGP"]["online"])
# b) find out if a peering already exists (maybe where we only need to add our router id?)
peering = None
try:
peering = Peering.getPeering(currAS, neighAS)
except Peering.DoesNotExist:
peering = Peering(as1=currAS, as2=neighAS, origin=Peering.DIRECT)
peering.save()
# c) look for router/peering pairs
if peer["BGP"]["neighbor_id"]:
try:
neighRouter = BorderRouter.objects.get(AS=neighAS, routerID=peer["BGP"]["neighbor_id"])
except BorderRouter.DoesNotExist:
neighRouter = BorderRouter(AS=neighAS, routerID=peer["BGP"]["neighbor_id"],
pingable=False, reachable=False)
neighRouter.save()
try:
BorderRouterPair.getPairing(peering, currRouter, neighRouter)
except BorderRouterPair.DoesNotExist:
pairs = BorderRouterPair.objects.filter(Q(peering=peering) & (Q(router1=neighRouter, router2=None) |
Q(router1=None, router2=neighRouter)))
if pairs.count() > 0:
pair = pairs[0]
if pair.router1 is None:
pair.router1 = currRouter
else:
pair.router2 = currRouter
pair.save()
else:
pair = BorderRouterPair(peering=peering, router1=currRouter, router2=neighRouter)
pair.save()
print(" --> Announcements")
if "routes" in data and data["routes"]:
for route in data["routes"]:
print(" ---->", route["prefix"])
if "/" not in route["prefix"]:
continue
crawlAS = currAS
if len(route["path"]) > 0:
route["path"] = list(map(str, route["path"]))
crawlAS = getOrCreateAS(crawl, route["path"][0])
originAS = getOrCreateAS(crawl, route["path"][-1])
ip, prefix = route["prefix"].split("/")
a = Announcement(router=currRouter, ip=ip, prefix=prefix,
ASPath=" ".join(route["path"]), nextHop=route["nexthop"],
crawlAS=crawlAS, originAS=originAS)
a.save()
else:
print(" !! No routes found in host output")
CrawlLog.log(crawl, "No routes found in host output (no bgp feed included?)",
host=host, severity=CrawlLog.WARN)
2015-03-23 01:42:31 +01:00
if __name__ == '__main__':
main()