From 4f4205568a59dd1720409c76dd33e2e9a5a22b7b Mon Sep 17 00:00:00 2001 From: Sebastian Lohff Date: Sun, 25 Dec 2011 23:01:50 +0100 Subject: [PATCH] Fixed DPHelper, now with encoding --- tunnel/rfc1149/phelper.py | 312 ++++++++++++++++++++++++++++++++++++++ tunnel/rfc1149/rfc1149.py | 38 +++-- 2 files changed, 330 insertions(+), 20 deletions(-) create mode 100644 tunnel/rfc1149/phelper.py diff --git a/tunnel/rfc1149/phelper.py b/tunnel/rfc1149/phelper.py new file mode 100644 index 0000000..f84596c --- /dev/null +++ b/tunnel/rfc1149/phelper.py @@ -0,0 +1,312 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import bitarray +import random +import re +import urllib +from HTMLParser import HTMLParser + +class PHelper(): + """ Packaging Helper baseclass """ + @staticmethod + def intToBits(n, length): + """ Convert the number n to a bitarray of length. """ + i = 0 + c = 1 + ret = [False] * length + while i < length: + ret[length-(i+1)] = (n & c) > 0 + i += 1 + c <<= 1 + return ret + + @staticmethod + def bitsToInt(bits): + ret = 0 + for i in bits: + ret = (ret << 1) | (i and 1 or 0) + return ret + + @staticmethod + def toBin(val, leading): + """ Binary formatter for python versions without str.format() """ + x = bin(val).replace("0b", "") + return "0" * (leading-len(x)) + x + + @staticmethod + def encode(data): + """ Encode data, return list of messages (max. 140 chars long) """ + raise NotImplementedError("You need to implement this method when subclassing.") + + @staticmethod + def decode(packet): + """ Decode packet, return tuple(isFragmented boolean), packetLength int, packetId int, data (byte)string) """ + raise NotImplementedError("You need to implement this method when subclassing.") + + +class UPHelper(PHelper): + """ The Unicode Packaging Helper + + Twitter supports 140 chars, while a char can be a unicode + character. For a unicode character there are 2^20 possibilities. + For the sake of lazyness we put two bytes in each character, using + only 2^16. The remaining 4 bits can be used for metadata or whatever. + + The header in the metadata is as following: + + <9 bits length of payload> + <32 bit random paket id greater than 0>""" + + + @staticmethod + def encode(data): + """ Generate list of packets with a header from data. """ + packetId = random.randint(1, 2**32) + fragments = [] + while len(data) >= 280: + newData = data[0:280] + if newData[-1] == '\x00' and newData[-2] == '\x00' and len(newData) == 280: + fragments.append(data[0:278]) + data = data[278:] + else: + fragments.append(newData) + data = data[280:] + if len(data) > 0: + fragments.append(data) + + # convert to twitter message + for y in range(len(fragments)): + fragment = fragments[y] + lenX = len(fragment) + # pad packet if it is not long enouth / not aligned + if len(fragment) < 2*11: + fragment = fragment + "-" * (2*11-len(fragment)) + if len(fragment) % 2 == 1: + fragment += "-" + + # write header (bits: 1 fragment, 9 length, 32 id) + header = bitarray.bitarray(1) + # write fragment-bit + header[0] = not (y+1 == len(fragments)) + # append packet length + header.extend(UPHelper.intToBits(lenX, 9)) + # add packet id + header.extend(UPHelper.intToBits(packetId, 32)) + # padding to complete last 4 bytes + header.extend([False, False]) + + i = 0 + h = 0 + ret = "" + while i+1 < len(fragment): + val = ord(fragment[i]) << 8 | ord(fragment[i+1]) + if h < 11: + val |= UPHelper.bitsToInt(header[h*4:(h+1)*4]) << 16 + h += 1 + # hack for (0x2026, 0x202f) + if val > 0x2026 and val < 0x202f: + val = val | (1<<16) + ret += unichr(val) + i += 2 + + # if the last characters are multiple \x00-bytes, twitter eats them! + # we already took care so there is space at the end for an extra dot + if ret[-1] == '\x00': + ret = ret + "." + fragments[y] = ret + return fragments + + @staticmethod + def reassembleBrokenChars(packet): + """ Reassemble broken characters back to unicode. + + Twitter breaks some characters (currently known range is 0xd800 - 0xdfff) + into r"\XXX\XXX\XXX", X being octal numbers. These are actually strings, + so one unicodechar from the range gets broken up to 12 chars. + + Note that through this packets containing the following byte sequence will + get mangled: + 00 5c 00 XX 00 XX 00 XX 00 5c 00 XX 00 XX 00 XX 00 5c 00 XX 00 XX 00 XX + while XX is a number from ord('0') to ord('9') + + Also _some_ of these are again converted into other chars. + \ud800\udc00 gets converted to \U00010000, so we need to guess-convert + these back. """ + origPacket = packet + brokenChars = re.findall(r"(\\([0-9]{3})\\([0-9]{3})\\([0-9]{3}))", packet) + for broken in brokenChars: + #print "broken", broken, repr("".join(map(lambda x: chr(int(x, 8)), broken[1:]))) + newChar = "".join(map(lambda x: chr(int(x, 8)), broken[1:])).decode("utf-8") + packet = packet.replace(broken[0], newChar) + + # this is guesswork-derivation, its derived from these lines + # they represent our input and twitters output + # guesswork++: for the header, this is plausible, afterwards not. + # u"\ud900\udc00 \uda00\udcFF \udb00\uddFF" + # u'\U00050000 \U000900ff \U000d01ff" + # u"\ud800\udc00 \ud800\udcFF \ud800\uddFF \ud800\ude00 \ud800\udeff \ud800\udf00 \ud800\udfff" + # u'\U00010000 \U000100ff \U000101ff \U00010200 \U000102ff \U00010300 \U000103ff' + # u"\ud800\udc00 \ud801\udc00 \ud802\udc00 \ud803\udc00 \ud804\udc00 \ud805\udc00\ud806\udc00 \ud807\udc00 \ud808\udc00 \ud809\udc00 \ud80a\udc00 \ud80b\udc00 \ud80c\udc00 \ud80d\udc00 \ud80e\udc00 \ud80f\udc00 \ud810\udc00 \ud811\udc00 \ud812\udc00 \ud813\udc00" + # u'\U00010000 \U00010400 \U00010800 \U00010c00 \U00011000 \U00011400\U00011800 \U00011c00 \U00012000 \U00012400 \U00012800 \U00012c00 \U00013000 \U00013400 \U00013800 \U00013c00 \U00014000 \U00014400 \U00014800 \U00014c00' + + + for c in origPacket[11:]: + o = ord(c) + # (0x2027, 0x202f) are not displayed properly + if o > 0x12027 and o < 0x1202f: + packet.replace(c, unichr(o & (0xFFFF))) + elif o > 65535: + # -.- + a = unichr(0xd800 + ((o >> 10) - 64)) + b = unichr(0xdc00 + (o & 1023)) + packet = packet.replace(c, a+b) + return packet + + @staticmethod + def decode(packet): + """ Decodes an unicodestring (packet) back to header + data + + Returns: tupel(isFragmented, packetLen, packetId, data) """ + + packet = UPHelper.reassembleBrokenChars(packet) + + if len(packet) < 11: + raise ValueError("This is not a valid packet, header is too short (should be at least 11, is %d)" % len(packet)) + header = bitarray.bitarray() + for i in range(11): + # format with binary is not understood by older python versions + #header.extend("{:04b}".format(ord(packet[i]) >> 16)) + header.extend(UPHelper.toBin((ord(packet[i]) >> 16), 4)) + isFragmented = header[0] + + packetLen = UPHelper.bitsToInt(header[1:10]) + packetId = UPHelper.bitsToInt(header[10:]) + if packetId == 0: + raise ValueError("Packet id cannot be 0") + + rawData = map(lambda x: ord(x) & 0xFFFF, packet) + data = [] + for p in rawData: + data.append(chr((p >> 8) & 255)) + data.append(chr(p & 255)) + data = "".join(data[:packetLen]) + return (isFragmented, packetLen, packetId, data) + +class DPHelper(PHelper): + """ The Dump Packaging Helper + + As twitters unicodehandling is quiet random, this is an + attempt to a more reliable en-/decoding. It uses + the first char for an unicode header, the remaining 138 + chars are used for the networkpacket in plain. + + Header: + <1 bit, True if fragment> + <8 bit, lenght of packet> + <11 bit, packet id [1, 2047]> + """ + + @staticmethod + def textEncode(t): + return t.replace("&", "&") \ + .replace("\r", " ") + + @staticmethod + def textDecode(t): + return t.replace("<", "<") \ + .replace(">", ">") \ + .replace(" ", "\r")\ + .replace("&", "&") \ + + @staticmethod + def encode(data): + # twitter encodes <>'s ==> we need to encode & to distinguish between < and an encoded etc. + data = DPHelper.textEncode(data) + packetId = random.randint(1, 2**11) + fragments = [] + while len(data) >= 139: + newData = data[0:139] + if newData[-1] == '\x00' and newData[-2] == '\x00' and len(newData) == 139: + fragments.append(data[0:138]) + data = data[138:] + else: + fragments.append(newData) + data = data[139:] + if len(data) > 0: + fragments.append(data) + + # convert to twitter message + for y in range(len(fragments)): + fragment = fragments[y] + lenX = len(fragment) + + # write header (bits: 1 fragment, 8 length, 11 id) + header = bitarray.bitarray(1) + # write fragment-bit + header[0] = not (y+1 == len(fragments)) + # append packet length + header.extend(DPHelper.intToBits(lenX, 8)) + # add packet id + header.extend(DPHelper.intToBits(packetId, 11)) + ret = unichr(DPHelper.bitsToInt(header)) + "".join([unichr(ord(i)) for i in fragment]) + + # if the last characters are multiple \x00-bytes, twitter eats them! + # we already took care so there is space at the end for an extra dot + if ret[-1] == '\x00' and ret[-2] == '\x00': + ret = ret + "." + fragments[y] = ret + return fragments + + @staticmethod + def decode(packet): + """ Decodes an unicodestring (packet) back to header + data + + Returns: tupel(isFragmented, packetLen, packetId, data) """ + + # twitter urlencodes < and > to < and ≷ + # this will of course break packets containing an actual < or > + packet = DPHelper.textDecode(packet) + #packet = packet.replace("<", "<").replace(">", ">").replace("&", "&") + + if len(packet) < 2: + raise ValueError("This is not a valid packet, header is too short (should be at least 11, is %d)" % len(packet)) + header = bitarray.bitarray(DPHelper.toBin(ord(packet[0]), 20)) + isFragmented = header[0] + + packetLen = DPHelper.bitsToInt(header[1:9]) + packetId = DPHelper.bitsToInt(header[9:]) + if packetId == 0: + raise ValueError("Packet id cannot be 0") + + data = "".join(map(lambda x: chr(ord(x)), packet[1:packetLen+1])) + return (isFragmented, packetLen, packetId, data) + +if __name__ == '__main__': + msg = '\x00\x00\x08\x00E\x00\x00T\x00\x00@\x00@\x01\x12\x81\n\n\n\x0b\n\n\n\n\x08\x00\xd7Gt\xd2\x00\x01[U\xf1Nl=\x08\x00\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./01234567' + enc = DPHelper.encode(msg) + print "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + print "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + print "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + print repr(enc) + ret = "" + for e in enc: + e = DPHelper.decode(e) + ret += e[3] + print "broken", repr(DPHelper.encode(msg)[0]) + m = DPHelper.decode(DPHelper.encode(msg)[0]) + print repr(msg) + print repr(ret) + if ret == msg: + print "success" + else: + print "failure" + print "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + print "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + print "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + print DPHelper.decode(DPHelper.encode("X"*281)[0]) + print DPHelper.decode(DPHelper.encode("X"*281)[1]) + print DPHelper.decode(DPHelper.encode("X"*281)[2]) + print repr(DPHelper.encode("foo & < \r\r > and <")[0].replace("<", "<").replace(">", ">")) + print DPHelper.decode(DPHelper.encode("foo & < \r\r > and <")[0].replace("<", "<").replace(">", ">")) + diff --git a/tunnel/rfc1149/rfc1149.py b/tunnel/rfc1149/rfc1149.py index 6089ade..deef9b6 100755 --- a/tunnel/rfc1149/rfc1149.py +++ b/tunnel/rfc1149/rfc1149.py @@ -9,40 +9,33 @@ import sys import threading import time import tweepy -from uphelper import UPHelper sys.path.append("../../../") from conf import Conf from ether2any import Ether2Any -from ether2any.helper import getDstMacFromPkt, isBroadcast, binToHexStr class TwittStreamHandler(tweepy.StreamListener): - def __init__(self, dev, debug=False): + def __init__(self, dev, coder, debug=False): super(TwittStreamHandler, self).__init__() self.dev = dev self.fragments = collections.defaultdict(str) self.debug = debug + self.coder = coder def on_status( self, status ): """ On statis, decode and reassemble packet-status-texts. If complete, write them to the tun-dev. """ - # Twitter breaks some of the unicode characters, so we need to reassemble them. - # Note that through this packets containing the following bytes will get mangled: - # 00 5c 00 XX 00 XX 00 XX 00 5c 00 XX 00 XX 00 XX 00 5c 00 XX 00 XX 00 XX - # while XX is a number from ord('0') to ord('9') - sourcePacket = UPHelper.reassembleBrokenChars(status.text) + sourcePacket = status.text if self.debug: print "in uni:", repr(sourcePacket) (isFragment, packetLen, packetId, packet) = None, None, None, None try: - (isFragment, packetLen, packetId, packet) = UPHelper.decode(sourcePacket) + (isFragment, packetLen, packetId, packet) = self.coder.decode(sourcePacket) except ValueError, e: print "Could not decode tweet, omitting (Error was: %s).\n\tText was: %s" % (e, repr(sourcePacket)) raise return - #print "Parsed packet:", (isFragment, packetLen, packetId) - #print "\t contents:", packet if isFragment: self.fragments[packetId] += packet print " >+ Added fragment with id", packetId @@ -52,10 +45,13 @@ class TwittStreamHandler(tweepy.StreamListener): toSend = self.fragments[packetId] + packet else: toSend = packet - print " >> Received packet with id", packetId - if self.debug: - print repr(toSend) - self.dev.write(toSend) + if toSend == '': + print " >! Received packet with id", packetId, "is BROKEN" + else: + print " >> Received packet with id", packetId + if self.debug: + print repr(toSend) + self.dev.write(toSend) def on_limit(self, track): print "We got limited ", track @@ -78,17 +74,18 @@ class TwittStreamHandler(tweepy.StreamListener): # TODO: Thread is not needed, tweepy has its own threading. remove it class DownstreamThread(threading.Thread): - def __init__(self, dev, auth, endpoint, debug=False): + def __init__(self, dev, coder, auth, endpoint, debug=False): threading.Thread.__init__(self) self.debug = debug self.auth = auth + self.coder = coder self.api = tweepy.API(self.auth) self.endpoint = endpoint self.dev = dev self.daemon = True def run(self): - stream = tweepy.Stream(auth=self.auth, listener=TwittStreamHandler(self.dev, self.debug)) + stream = tweepy.Stream(auth=self.auth, listener=TwittStreamHandler(self.dev, self.coder, self.debug)) user = self.api.get_user(self.endpoint) print "Endpoint is", self.endpoint, "with id", user.id stream.filter([user.id]) @@ -99,6 +96,7 @@ class RFC1149(Ether2Any): self.debug = debug network = Conf.get("network", {'mtu': 1400}) + self.coder = Conf.get("coder", None) self.twitterConf = Conf.get("twitter", None) self.endpoint = self.twitterConf['endpoint'] if not self.endpoint: @@ -108,7 +106,7 @@ class RFC1149(Ether2Any): self.dev.up() self._setupTwitter() - self.downstream = DownstreamThread(dev=self.dev, auth=self.auth, endpoint=self.endpoint, debug=self.debug) + self.downstream = DownstreamThread(dev=self.dev, coder=self.coder, auth=self.auth, endpoint=self.endpoint, debug=self.debug) self.downstream.start() def _requestTwitterTokens(self): @@ -154,7 +152,7 @@ if not Conf['twitter']['ACCESS_KEY']: return "".join(map(lambda x: chr(x+48), reversed(s))) def sendToNet(self, packet): - fragments = UPHelper.encode(packet) + fragments = self.coder.encode(packet) if self.debug: print "out raw:", repr(packet) print "out frag:", repr(fragments) @@ -180,7 +178,7 @@ if not Conf['twitter']['ACCESS_KEY']: print " >! ERROR - Either connection refused or limited. Not sent." if __name__ == '__main__': - rfc = RFC1149() + rfc = RFC1149(debug=True) print "Starting RFC1149 ip-over-twitter service..." rfc.run()