dnmapper/bin/crawl.py

214 lines
6.9 KiB
Python
Executable File

#!/usr/bin/env python2
from __future__ import print_function
# config
LAST_SEEN_DAYS = 7
# prepare environment
import sys
sys.path.append("..")
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dnmapper.settings")
import django
django.setup()
import socket
import datetime
from django.utils import timezone
from django.db.models import Q, Max
from bgpdata.models import ConfigHost, CrawlRun, CrawlLog, AS, BorderRouter, Announcement, Peering, BorderRouterPair
from routerparsers import getBGPData, RouterParserException
def getOrCreateAS(crawl, number, online=True):
currAS = None
try:
currAS = AS.objects.get(crawl=crawl, number=number)
if online:
currAS.setOnline()
except AS.DoesNotExist:
currAS = AS(crawl=crawl, number=number, online=online)
currAS.save()
return currAS
def main():
# 1. create crawl run
crawl = CrawlRun()
crawl.startTime = timezone.now()
crawl.save()
CrawlLog.log(crawl, "Starting crawl run!", severity=CrawlLog.INFO)
# 2. get data from all hosts, put it in the database
for host in ConfigHost.objects.all():
crawl.hostsCrawled.add(host)
data = None
print(" -- Getting data for host %s" % host)
try:
if host.checkMethod == 'CMK':
data = getBGPData(host.ip, host.number)
else:
CrawlLog.log(crawl, "Method %s is not currently supported, skipping host" % host.checkMethod, host=host, severity=CrawlLog.ERROR)
continue
except RouterParserException as e:
msg = "Could not parse data for host: %s" % str(e)
print("%s: %s" % (host, msg))
CrawlLog.log(crawl, msg, host=host, severity=CrawlLog.ERROR)
continue
except socket.error as e:
msg = "Could not reach host: %s" % (e,)
print("%s: %s" % (host, msg))
CrawlLog.log(crawl, msg, host=host, severity=CrawlLog.ERROR)
continue
print(" -- parsing...")
currASno = int(data["local_as"])
currAS = getOrCreateAS(crawl, currASno)
currAS.directlyCrawled = True
currAS.save()
currRouter = None
try:
currRouter = BorderRouter.objects.get(AS=currAS, routerID=data["local_id"])
currRouter.pingable = True
currRouter.reachable = True
currRouter.save()
except BorderRouter.DoesNotExist:
currRouter = BorderRouter(AS=currAS, routerID=data["local_id"], pingable=True, reachable=True)
currRouter.save()
print(" --> peers")
for peer in data["peers"]:
# peerings
# data: BGP{state, neighbor_id, neighbor_as}, description
# a) find/create neighbor
print(" ----> Peer:", int(peer["BGP"]["neighbor_as"]))
neighAS = getOrCreateAS(crawl, int(peer["BGP"]["neighbor_as"]), online=peer["BGP"]["online"])
# b) find out if a peering already exists (maybe where we only need to add our router id?)
peering = None
try:
peering = Peering.getPeering(currAS, neighAS)
except Peering.DoesNotExist:
peering = Peering(as1=currAS, as2=neighAS, origin=Peering.DIRECT)
peering.save()
# c) look for router/peering pairs
if peer["BGP"]["neighbor_id"]:
try:
neighRouter = BorderRouter.objects.get(AS=neighAS, routerID=peer["BGP"]["neighbor_id"])
except BorderRouter.DoesNotExist:
neighRouter = BorderRouter(AS=neighAS, routerID=peer["BGP"]["neighbor_id"], pingable=False, reachable=False)
neighRouter.save()
try:
BorderRouterPair.getPairing(peering, currRouter, neighRouter)
except BorderRouterPair.DoesNotExist:
pairs = BorderRouterPair.objects.filter(Q(peering=peering) & (Q(router1=neighRouter, router2=None)|Q(router1=None, router2=neighRouter)))
if pairs.count() > 0:
pair = pairs[0]
if pair.router1 == None:
pair.router1 = currRouter
else:
pair.router2 = currRouter
pair.save()
else:
pair = BorderRouterPair(peering=peering, router1=currRouter, router2=neighRouter)
pair.save()
print(" --> Announcements")
if "routes" in data and data["routes"]:
for route in data["routes"]:
print(" ---->", route["prefix"])
if "/" not in route["prefix"]:
continue
originAS = currAS
if len(route["path"]) > 0:
originAS = getOrCreateAS(crawl, route["path"][0])
ip, prefix = route["prefix"].split("/")
a = Announcement(router=currRouter, ip=ip, prefix=prefix,
ASPath=" ".join(route["path"]), nextHop=route["nexthop"],
originAS=originAS)
a.save()
else:
print(" !! No routes found in host output")
CrawlLog.log(crawl, "No routes found in host output (no bgp feed included?)", host=host, severity=CrawlLog.WARN)
# 3. calculate missing data
print(" -- Adding extra data from announcements...")
# 3.1. use announcement data to find hidden peerings
for announcement in Announcement.objects.filter(router__AS__crawl=crawl):
path = announcement.ASPath.split(" ")
if len(path) > 1:
firstASno = path.pop(0)
firstAS = getOrCreateAS(crawl, firstASno)
while len(path) > 0:
secondASno = path.pop(0)
secondAS = getOrCreateAS(crawl, secondASno)
try:
Peering.getPeering(firstAS, secondAS)
except Peering.DoesNotExist:
peering = Peering(as1=firstAS, as2=secondAS, origin=Peering.PATH)
peering.save()
firstAS = secondAS
# 3.2 add ASses, routers and peerings from old crawlruns (last should suffice)
# find
print(" --> copy old ASses")
timerangeStart = crawl.startTime - datetime.timedelta(LAST_SEEN_DAYS)
oldASses = AS.objects.filter(online=True, crawl__startTime__gte=timerangeStart).values("number").annotate(lastSeen=Max('crawl_id')).filter(~Q(lastSeen=crawl.pk))
# 3.2.1. copy old asses
print(" ----> create ASses")
for oldASdata in oldASses:
print(" ------> AS", oldASdata["number"])
oldAS = AS.objects.get(number=oldASdata["number"], crawl=oldASdata["lastSeen"])
try:
newAS = AS.objects.get(number=oldAS.number, crawl=crawl)
if not newAS.online and not newAS.lastSeen:
newAS.lastSeen = oldAS.crawl
newAS.save()
except:
newAS = AS(number=oldAS.number, crawl=crawl, lastSeen=oldAS.crawl, directlyCrawled=False, online=False)
newAS.save()
# 3.2.2 copy peerings between old asses
print(" ----> copy peerings")
for oldASdata in oldASses:
print(" ------> AS", oldASdata["number"])
oldAS = AS.objects.get(number=oldASdata["number"], crawl=oldASdata["lastSeen"])
for peering in oldAS.getPeerings():
print(" --------> Peering %s <--> %s" % (peering.as1.number, peering.as2.number))
peering = Peering(
as1=AS.objects.get(number=peering.as1.number, crawl=crawl),
as2=AS.objects.get(number=peering.as2.number, crawl=crawl),
origin=peering.origin)
peering.save()
# 3.3 FIXME: do we also want to have old peerings which do not exist anymore?
# 4. end crawl run
crawl.endTime = timezone.now()
crawl.save()
# additional data
crawl.asCount = crawl.as_set.count()
crawl.asOnlineCount = crawl.as_set.filter(online=True).count()
crawl.asOfflineCount = crawl.as_set.filter(online=False).count()
crawl.peeringCount = Peering.objects.filter(Q(as1__crawl=crawl)|Q(as2__crawl=crawl)).count()
crawl.save()
print(" !! Done")
CrawlLog.log(crawl, "Crawl completed", severity=CrawlLog.INFO)
if __name__ == '__main__':
main()