You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

442 lines
16 KiB

#!/usr/bin/python
""" wtbase - what the base
Provides classes to encode text to arbitrary bases (base 2^n supported) and
then to textual forms. """
import math
import random
class DecodingException(Exception):
pass
class Token():
""" A Token contains a word/sentence and a list of tokenlists which can follow the word. """
def __init__(self, word, nextLists):
self.word = word
self.nextLists = nextLists
def __str__(self):
return "Token: word \"%s\"" % self.word
class TextGenerator():
""" Basis generator to en- and decode text-bits. """
def __init__(self, base=16):
self.lists = {}
self.base = base
self.baseBit = math.log(base, 2)
if self.baseBit != int(self.baseBit):
raise ValueError("base must be a power of 2")
self.baseBit = int(self.baseBit)
self.startList = "initial"
def addList(self, identifier, newList):
self.lists[identifier] = newList
def getList(self, identifier):
return self.lists.get(identifier, None)
def convToBits(self, data):
""" Converts a data string into n-bit parts defined by self.base.
Returns a list of integers. """
l = []
bit = 0
rest = 0
for c in data:
n = ord(c)
if bit != 0:
nc = rest | ((n & ((1 << bit) - 1 << 8-bit)) >> 8-bit)
l.append(nc)
while 8-bit >= self.baseBit:
nc = (n & (((1 << self.baseBit)-1) << 8-bit-self.baseBit)) >> (8-bit-self.baseBit)
l.append(nc)
bit += self.baseBit
rest = (n & (1 << 8-bit) - 1) << self.baseBit-(8-bit)
bit = (bit+self.baseBit) % 8 % self.baseBit
if bit != 0:
l.append(rest)
return l
def convToNums(self, data):
""" Reassemble a list of bits back to the original data bytestring. """
l = ""
w = 0
rest = 0
bit = 0
for n in data:
if bit+self.baseBit >= 8:
w |= n >> self.baseBit - (8-bit)
bit = self.baseBit - (8-bit)
l += chr(w)
if bit != 0:
w = (n & (1 << bit) - 1) << 8 - bit
else:
w = 0
else:
w |= n << 8-bit-self.baseBit
bit = (bit + self.baseBit) % 8
if bit == 0:
l += chr(w)
w = 0
return l
#def convTo4Bits(self, data):
# l = []
# for c in data:
# n = ord(c)
# lo = n & ((1 << 4)-1)
# hi = (n & (((1 << 4)-1) << 4)) >> 4
# l.extend([hi, lo])
# return l
class SpamGenerator(TextGenerator):
""" De- and encode data in base8 spam text. """
def __init__(self):
TextGenerator.__init__(self, base=8)
self.startList = "greeting"
self.addList("greeting",
{
0: Token("Hi,\n\n", ["start"]),
1: Token("Hey,\n\n", ["start"]),
2: Token("Greetings,\n\n", ["start"]),
3: Token("Dear Mr. or Mrs.,,\n\n", ["start"]),
4: Token("SPECIAL OFFER!\n", ["start"]),
5: Token("High Quality! Read on!\n", ["start"]),
6: Token("Best buy!\n\n", ["start"]),
7: Token("Dear Valued Customer,\n\n", ["start"]),
8: Token("Well, uhm, ", ["start"]),
})
self.addList("start",
{
0: Token("we are happy to ", ["inform_them"]),
1: Token("we are glad to ", ["inform_them"]),
2: Token("we gladly ", ["inform_them"]),
3: Token("it happens that we can ",["inform_them"]),
4: Token("we want to ", ["inform_them"]),
5: Token("today ", ["you_have"]),
6: Token("ITS TRUE! ", ["you_have"]),
7: Token("you won! ", ["you_have"]),
8: Token("awesome for you, buddy! ",["leaving"]),
})
self.addList("inform_them",
{
0: Token("inform you, that ", ["you_have"]),
1: Token("make a remarkt, that ", ["you_have"]),
2: Token("announce, that ", ["you_have"]),
3: Token("celebrate with you! ", ["you_have"]),
4: Token("congratulate you, because ", ["you_have"]),
5: Token("take the extra step: ", ["you_have"]),
6: Token("tell you, that ", ["you_have"]),
7: Token("don't forget about you, ", ["you_have"]),
8: Token("move property! ", ["you_have"]),
})
self.addList("you_have",
{
0: Token("you won ", ["won_item"]),
1: Token("you have won ", ["won_item"]),
2: Token("you aqcuired ", ["won_item"]),
3: Token("one time offer only: ", ["won_item"]),
4: Token("at your account we found ", ["won_item"]),
5: Token("the prince of nigeria offers to you ", ["won_item"]),
6: Token("off shore accounts brought you ", ["won_item"]),
7: Token("insider traging brought you ", ["won_item"]),
8: Token("you managed to get", ["won_item"]),
})
self.addList("won_item",
{
0: Token("a sum of ", ["money_sum"]),
1: Token("the priceless diamond of Zalanda. " , ["claim"]),
2: Token("free viagra! ", ["claim"]),
3: Token("an inheritance of ", ["money_sum"]),
4: Token("the opportunity to make money online! ", ["claim"]),
5: Token("a part of an oil pipe line, worth ", ["money_sum"]),
6: Token("free money - ", ["money_sum"]),
7: Token("a rare antique item worth", ["money_sum"]),
8: Token("quiet a bit o' stuff. ", ["claim"]),
})
self.addList("money_sum",
{
0: Token( "5,000,000 USD. ", ["claim"]),
1: Token("10,000,000 USD. ", ["claim"]),
2: Token( "300,000 USD. ", ["claim"]),
3: Token("13,412,573 USD. ", ["claim"]),
4: Token( "7,555,530 USD. ", ["claim"]),
5: Token( "50,000 USD. ", ["claim"]),
6: Token( "4,500,000 USD. ", ["claim"]),
7: Token("42,000,000 USD. ", ["claim"]),
8: Token("87,000,000 USD. ", ["claim"]),
})
self.addList("claim",
{
0: Token("To claim ", ["claimable_item"]),
1: Token("To get hold ", ["claimable_item"]),
2: Token("To acquire ", ["claimable_item"]),
3: Token("To receive ", ["claimable_item"]),
4: Token("To obtain ", ["claimable_item"]),
5: Token("To gatherh ", ["claimable_item"]),
6: Token("To take ownership ", ["claimable_item"]),
7: Token("To collect ", ["claimable_item"]),
8: Token("To finally get ", ["claimable_item"]),
})
self.addList("claimable_item",
{
0: Token("this item, please send ", ["sendables"]),
1: Token("this stuff, please send ", ["sendables"]),
2: Token("your profit, please send ", ["sendables"]),
3: Token("these assets, please send ", ["sendables"]),
4: Token("this price, please send ", ["sendables"]),
5: Token("your earnings, please send ", ["sendables"]),
6: Token("this top-line profit, please send ", ["sendables"]),
7: Token("this treasure, please send ", ["sendables"]),
8: Token("this your winnings, please send ", ["sendables"]),
})
self.addList("sendables",
{
0: Token("us all your information.\n\n", ["more_stuff", "jibberjabber_start"]),
1: Token("us your account data.\n\n", ["more_stuff", "jibberjabber_start"]),
2: Token("us a transfer-free of 50 USD.\n\n", ["more_stuff", "jibberjabber_start"]),
3: Token("us a list of your passwords.\n\n", ["more_stuff", "jibberjabber_start"]),
4: Token("10 valid TAN Numbers.\n\n", ["more_stuff", "jibberjabber_start"]),
5: Token("us your mothers maiden name.\n\n", ["more_stuff", "jibberjabber_start"]),
6: Token("your birth certificate.\n\n", ["more_stuff", "jibberjabber_start"]),
7: Token("a listing of your incomes.\n\n", ["more_stuff", "jibberjabber_start"]),
8: Token("us your personal information.\n\n", ["jibberjabber_start", "leaving"]),
})
self.addList("more_stuff",
{
0: Token("But wait, there is more! ", ["you_have"]),
1: Token("But that is not all! ", ["you_have"]),
2: Token("And there is even more! ", ["you_have"]),
3: Token("Also ", ["you_have"]),
4: Token("And because you seem to be the luckiest person alive: ", ["you_have"]),
5: Token("And how does this sound: ", ["you_have"]),
6: Token("In addition ", ["you_have"]),
7: Token("But... what is this? ", ["you_have"]),
8: Token("AND! ", ["you_have"]),
})
# loop this. random conversation starter
self.addList("jibberjabber_start",
{
0: Token("Would you ", ["jj_consider"]), # have you <tought> <get/buy> <stuff>
1: Token("Will you ", ["jj_consider"]),
2: Token("Did you ever ", ["jj_consider"]),
3: Token("Maybe you ", ["jj_consider"]),
4: Token("In ", ["jj_times"]), # in <time> there is <stuff>
5: Token("At ", ["jj_times"]),
6: Token("Living in ", ["jj_times"]),
7: Token("Considering ", ["jj_times"]),
8: Token("Everything will be better!", ["leaving"]),
})
self.addList("jj_times",
{
0: Token("times like these ", ["jj_whattodo"]),
1: Token("the age of the internet ", ["jj_whattodo"]),
2: Token("mobile times ", ["jj_whattodo"]),
3: Token("this economic crisis ", ["jj_whattodo"]),
4: Token("the time of globalisation ", ["jj_whattodo"]),
5: Token("the age of the global village ", ["jj_whattodo"]),
6: Token("a world of networks ", ["jj_whattodo"]),
7: Token("times of moral values ", ["jj_whattodo"]),
8: Token("the here and now ", ["jj_whattodo"]),
})
self.addList("jj_consider",
{
0: Token("consider ", ["jj_buyverb"]),
1: Token("think about ", ["jj_buyverb"]),
2: Token("take into account ", ["jj_buyverb"]),
3: Token("have the desire for ", ["jj_buyverb"]),
4: Token("evaluate ", ["jj_buyverb"]),
5: Token("reason about ", ["jj_buyverb"]),
6: Token("keep in mind ", ["jj_buyverb"]),
7: Token("suggest ", ["jj_buyverb"]),
8: Token("imagine ", ["jj_buyverb"]),
})
self.addList("jj_buyverb",
{
0: Token("buying ", ["jj_buynoun"]),
1: Token("obtaining ", ["jj_buynoun"]),
2: Token("purchasing ", ["jj_buynoun"]),
3: Token("posessing ", ["jj_buynoun"]),
4: Token("owning ", ["jj_buynoun"]),
5: Token("creating ", ["jj_buynoun"]),
6: Token("crafting ", ["jj_buynoun"]),
7: Token("receiving ", ["jj_buynoun"]),
8: Token("getting ", ["jj_buynoun"]),
})
self.addList("jj_buynoun",
{
0: Token("a new car? ", ["jibberjabber_start"]),
1: Token("an own house? ", ["jibberjabber_start"]),
2: Token("the women of your dreams? ", ["jibberjabber_start"]),
3: Token("a healthy sexual relationship? ", ["jibberjabber_start"]),
4: Token("an own country? ", ["jibberjabber_start"]),
5: Token("your penis size? ", ["jibberjabber_start"]),
6: Token("free viagra? ", ["jibberjabber_start"]),
7: Token("the newest of apples products? ", ["jibberjabber_start"]),
8: Token("a brand new kitchentable? ", ["jibberjabber_start", "leaving"]),
})
self.addList("jj_whattodo",
{
0: Token("you should always think about ", ["jj_whattodonoun"]),
1: Token("the moral values predict good values for ", ["jj_whattodonoun"]),
2: Token("society will talk about ", ["jj_whattodonoun"]),
3: Token("all your friends will admire ", ["jj_whattodonoun"]),
4: Token("the talk of your social group will be ", ["jj_whattodonoun"]),
5: Token("genderstudies will celebrate ", ["jj_whattodonoun"]),
6: Token("considering everything about ", ["jj_whattodonoun"]),
7: Token("your possibilities are unimaginable regarding ", ["jj_whattodonoun"]),
8: Token("things are looking good regarding", ["jj_whattodonoun"]),
})
self.addList("jj_whattodonoun",
{
0: Token("the stock market. ", ["jibberjabber_start"]),
1: Token("your penis size. ", ["jibberjabber_start"]),
2: Token("how attractive you are to the opposite sex. ", ["jibberjabber_start"]),
3: Token("your investment in foreign oil company funds. ", ["jibberjabber_start"]),
4: Token("mobility options for going into the mobile business. ", ["jibberjabber_start"]),
5: Token("a bottle from our best collection of tasteful wines. ", ["jibberjabber_start"]),
6: Token("buying viagra online NOW! ", ["jibberjabber_start"]),
7: Token("getting more money out of your job! ", ["jibberjabber_start"]),
8: Token("winning money in Las Vegas! ", ["leaving"]),
})
# HACK: At the moment this does not support choosing random words
self.addList("leaving",
{
0: Token(None, [None]),
1: Token(None, [None]),
2: Token(None, [None]),
3: Token(None, [None]),
4: Token(None, [None]),
5: Token(None, [None]),
6: Token(None, [None]),
7: Token(None, [None]),
8: Token("\n\nYours faithfully,\n\n", ["leave_name_%d" % i for i in range(8)]),
})
for i in zip(range(8), ["Ernest Schlempl", "Bernhard Vonneguth", "Maria Peters", "Sibille Harstall", "Richmond Maltitz", "Benno Boch", "Tatjana Horn", "Marcell Hintzenstern"]):
self.addList("leave_name_%d" % i[0], {8: Token(i[1], [None])})
self.addList("m",
{
0: Token("", [None]),
1: Token("", [None]),
2: Token("", [None]),
3: Token("", [None]),
4: Token("", [None]),
5: Token("", [None]),
6: Token("", [None]),
7: Token("", [None]),
8: Token("", [None]),
})
def encode(self, data):
""" Encode data: Traverse wordlists. Return spam-text. """
listBits = self.convToBits(data)
nextList = self.startList
pos = 0
text = ""
while nextList:
bit = 8
if pos < len(listBits):
bit = listBits[pos]
#print "Pos:", pos, "- Entering list", nextList, "- Bit:", bit
l = self.getList(nextList)
idx = pos < len(listBits) and listBits[pos] or self.base
t = l[bit]
text += t.word
nextList = t.nextLists[random.randint(0, len(t.nextLists)-1)]
if bit != 8:
pos += 1
return text
def hexdump(self, data):
return ((len(data)*"%02x ") % tuple(map(lambda x: ord(x), data))).rstrip()
def decode(self, text):
""" Decode spam-text to original data. """
text = text.lstrip().replace("\r\n", "\n")
nextLists = [self.startList]
result = []
while len(text) > 0 and len(nextLists) > 0 and nextLists[0]:
match = False
for listname in nextLists:
(match, bits, token) = self.findInList(text, listname)
if match:
#print "matched value ", bits, "word", token.word
if bits != 8:
result.append(bits)
text = text.replace(token.word, "", 1)
nextLists = token.nextLists
#print "next possible lists are", nextLists
break
if not match:
#print "BASEWTF"
#print nextLists
#print self.getList(nextLists[0])
#print self.hexdump(text)
#print " --------------- "
print "Beginning of text (hex): ", self.hexdump(text[:10])
for l in self.getList(nextLists[0]):
print l, self.getList(nextLists[0])[l], self.hexdump(self.getList(nextLists[0])[l].word)
raise DecodingException("Could not decode text (no more possible lists). Remaining text is \"%s\"" % text)
# print "text remaining", text
convBack = self.convToNums(result)
return convBack
def findInList(self, text, listname):
sList = self.getList(listname)
for key in sList:
#print "\tTesting word: ", sList[key].word
if sList[key].word and (text.startswith(sList[key].word) or \
text.replace("\n", "").startswith(sList[key].word.replace("\n", ""))):
# HACK: Newline matching problem, mail classes add extra newlines
# ==> matching not possible
token = sList[key]
#print "\t==> MATCH in list", listname
return (True, key, token)
#print "\tNO match in list", listname
return (False, -1, None)
def main():
""" Main function, does en- and decoding test for testing purposes. """
data = "\xF2\x51\x92\x61\x9d\x1f\x0F\xb7\xaa\xc1"
for d in data:
print ord(d),
print ""
t = SpamGenerator()
d = t.convToBits(data)
e = t.convToNums(d)
print d
for a in e:
print ord(a),
print ""
msg = t.encode(data)
#print res, "\n"
res = t.decode(msg)
print
print d
for d in data:
print ord(d),
print ""
print msg
if __name__ == '__main__':
main()