dnmapper/bin/crawl.py

215 lines
7.0 KiB
Python
Raw Normal View History

2015-03-23 01:42:31 +01:00
#!/usr/bin/env python2
from __future__ import print_function
2015-03-27 16:43:47 +01:00
# config
LAST_SEEN_DAYS = 7
2015-03-23 01:42:31 +01:00
# prepare environment
import sys
sys.path.append("..")
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dnmapper.settings")
import django
django.setup()
2017-01-14 00:13:45 +01:00
import socket
2015-03-27 15:33:54 +01:00
import datetime
2015-03-23 01:42:31 +01:00
from django.utils import timezone
2015-03-27 15:33:54 +01:00
from django.db.models import Q, Max
2015-03-23 01:42:31 +01:00
from bgpdata.models import ConfigHost, CrawlRun, CrawlLog, AS, BorderRouter, Announcement, Peering, BorderRouterPair
from routerparsers import getBGPData, RouterParserException
2015-03-27 15:33:54 +01:00
def getOrCreateAS(crawl, number, online=True):
2015-03-23 01:42:31 +01:00
currAS = None
try:
currAS = AS.objects.get(crawl=crawl, number=number)
2015-03-27 16:43:47 +01:00
if online:
currAS.setOnline()
2015-03-23 01:42:31 +01:00
except AS.DoesNotExist:
2015-03-27 15:33:54 +01:00
currAS = AS(crawl=crawl, number=number, online=online)
2015-03-23 01:42:31 +01:00
currAS.save()
return currAS
def main():
# 1. create crawl run
crawl = CrawlRun()
crawl.startTime = timezone.now()
crawl.save()
CrawlLog.log(crawl, "Starting crawl run!", severity=CrawlLog.INFO)
# 2. get data from all hosts, put it in the database
for host in ConfigHost.objects.all():
crawl.hostsCrawled.add(host)
2015-03-23 01:42:31 +01:00
data = None
print(" -- Getting data for host %s" % host)
try:
if host.checkMethod == 'CMK':
data = getBGPData(host.ip, host.number)
else:
CrawlLog.log(crawl, "Method %s is not currently supported, skipping host" % host.checkMethod, host=host, severity=CrawlLog.ERROR)
continue
except RouterParserException as e:
msg = "Could not parse data for host: %s" % str(e)
print("%s: %s" % (host, msg))
2017-01-14 00:13:45 +01:00
CrawlLog.log(crawl, msg, host=host, severity=CrawlLog.ERROR)
continue
except socket.error as e:
msg = "Could not reach host: %s" % (e,)
print("%s: %s" % (host, msg))
2015-03-23 01:42:31 +01:00
CrawlLog.log(crawl, msg, host=host, severity=CrawlLog.ERROR)
continue
print(" -- parsing...")
currASno = int(data["local_as"])
currAS = getOrCreateAS(crawl, currASno)
2015-03-27 15:33:54 +01:00
currAS.directlyCrawled = True
currAS.save()
2015-03-23 01:42:31 +01:00
currRouter = None
try:
currRouter = BorderRouter.objects.get(AS=currAS, routerID=data["local_id"])
currRouter.pingable = True
currRouter.reachable = True
currRouter.save()
except BorderRouter.DoesNotExist:
currRouter = BorderRouter(AS=currAS, routerID=data["local_id"], pingable=True, reachable=True)
currRouter.save()
print(" --> peers")
for peer in data["peers"]:
# peerings
# data: BGP{state, neighbor_id, neighbor_as}, description
# a) find/create neighbor
print(" ----> Peer:", int(peer["BGP"]["neighbor_as"]))
2015-03-27 16:43:47 +01:00
neighAS = getOrCreateAS(crawl, int(peer["BGP"]["neighbor_as"]), online=peer["BGP"]["online"])
2015-03-23 01:42:31 +01:00
# b) find out if a peering already exists (maybe where we only need to add our router id?)
peering = None
try:
peering = Peering.getPeering(currAS, neighAS)
except Peering.DoesNotExist:
peering = Peering(as1=currAS, as2=neighAS, origin=Peering.DIRECT)
peering.save()
# c) look for router/peering pairs
if peer["BGP"]["neighbor_id"]:
try:
neighRouter = BorderRouter.objects.get(AS=neighAS, routerID=peer["BGP"]["neighbor_id"])
except BorderRouter.DoesNotExist:
neighRouter = BorderRouter(AS=neighAS, routerID=peer["BGP"]["neighbor_id"], pingable=False, reachable=False)
2015-03-23 01:42:31 +01:00
neighRouter.save()
try:
BorderRouterPair.getPairing(peering, currRouter, neighRouter)
except BorderRouterPair.DoesNotExist:
pairs = BorderRouterPair.objects.filter(Q(peering=peering) & (Q(router1=neighRouter, router2=None)|Q(router1=None, router2=neighRouter)))
if pairs.count() > 0:
pair = pairs[0]
if pair.router1 == None:
pair.router1 = currRouter
else:
pair.router2 = currRouter
pair.save()
else:
pair = BorderRouterPair(peering=peering, router1=currRouter, router2=neighRouter)
pair.save()
print(" --> Announcements")
if "routes" in data and data["routes"]:
for route in data["routes"]:
print(" ---->", route["prefix"])
if "/" not in route["prefix"]:
continue
2015-03-27 15:33:54 +01:00
crawlAS = currAS
2015-03-27 15:33:54 +01:00
if len(route["path"]) > 0:
crawlAS = getOrCreateAS(crawl, route["path"][0])
originAS = getOrCreateAS(crawl, route["path"][-1])
2015-03-23 01:42:31 +01:00
ip, prefix = route["prefix"].split("/")
a = Announcement(router=currRouter, ip=ip, prefix=prefix,
ASPath=" ".join(route["path"]), nextHop=route["nexthop"],
crawlAS=crawlAS, originAS=originAS)
2015-03-23 01:42:31 +01:00
a.save()
else:
print(" !! No routes found in host output")
CrawlLog.log(crawl, "No routes found in host output (no bgp feed included?)", host=host, severity=CrawlLog.WARN)
# 3. calculate missing data
print(" -- Adding extra data from announcements...")
# 3.1. use announcement data to find hidden peerings
for announcement in Announcement.objects.filter(router__AS__crawl=crawl):
path = announcement.ASPath.split(" ")
if len(path) > 1:
firstASno = path.pop(0)
firstAS = getOrCreateAS(crawl, firstASno)
while len(path) > 0:
secondASno = path.pop(0)
secondAS = getOrCreateAS(crawl, secondASno)
try:
Peering.getPeering(firstAS, secondAS)
except Peering.DoesNotExist:
peering = Peering(as1=firstAS, as2=secondAS, origin=Peering.PATH)
peering.save()
firstAS = secondAS
2015-03-27 15:33:54 +01:00
# 3.2 add ASses, routers and peerings from old crawlruns (last should suffice)
# find
print(" --> copy old ASses")
2015-03-27 16:43:47 +01:00
timerangeStart = crawl.startTime - datetime.timedelta(LAST_SEEN_DAYS)
oldASses = AS.objects.filter(online=True, crawl__startTime__gte=timerangeStart).values("number").annotate(lastSeen=Max('crawl_id')).filter(~Q(lastSeen=crawl.pk))
2015-03-27 15:33:54 +01:00
# 3.2.1. copy old asses
print(" ----> create ASses")
for oldASdata in oldASses:
print(" ------> AS", oldASdata["number"])
oldAS = AS.objects.get(number=oldASdata["number"], crawl=oldASdata["lastSeen"])
2015-03-27 16:43:47 +01:00
try:
newAS = AS.objects.get(number=oldAS.number, crawl=crawl)
if not newAS.online and not newAS.lastSeen:
newAS.lastSeen = oldAS.crawl
newAS.save()
except:
newAS = AS(number=oldAS.number, crawl=crawl, lastSeen=oldAS.crawl, directlyCrawled=False, online=False)
newAS.save()
2015-03-27 15:33:54 +01:00
# 3.2.2 copy peerings between old asses
print(" ----> copy peerings")
for oldASdata in oldASses:
print(" ------> AS", oldASdata["number"])
oldAS = AS.objects.get(number=oldASdata["number"], crawl=oldASdata["lastSeen"])
for peering in oldAS.getPeerings():
print(" --------> Peering %s <--> %s" % (peering.as1.number, peering.as2.number))
peering = Peering(
as1=AS.objects.get(number=peering.as1.number, crawl=crawl),
as2=AS.objects.get(number=peering.as2.number, crawl=crawl),
origin=peering.origin)
peering.save()
# 3.3 FIXME: do we also want to have old peerings which do not exist anymore?
2015-03-23 01:42:31 +01:00
# 4. end crawl run
crawl.endTime = timezone.now()
crawl.save()
# additional data
crawl.asCount = crawl.as_set.count()
crawl.asOnlineCount = crawl.as_set.filter(online=True).count()
crawl.asOfflineCount = crawl.as_set.filter(online=False).count()
crawl.peeringCount = Peering.objects.filter(Q(as1__crawl=crawl)|Q(as2__crawl=crawl)).count()
crawl.save()
2015-03-23 01:42:31 +01:00
print(" !! Done")
CrawlLog.log(crawl, "Crawl completed", severity=CrawlLog.INFO)
if __name__ == '__main__':
main()