Optimization of en-/decoder

This commit is contained in:
Sebastian Lohff 2011-11-17 23:21:17 +01:00
parent 5f5e9e8327
commit b9a96f6bc4
1 changed files with 49 additions and 32 deletions

View File

@ -5,7 +5,10 @@ Provides classes to encode text to arbitrary bases (base 2^n supported) and
then to textual forms. """ then to textual forms. """
import math import math
import random from random import randint, randrange
import sys
import time
from collections import deque
class DecodingException(Exception): class DecodingException(Exception):
pass pass
@ -348,19 +351,22 @@ class SpamGenerator(TextGenerator):
def encode(self, data): def encode(self, data):
""" Encode data: Traverse wordlists. Return spam-text. """ """ Encode data: Traverse wordlists. Return spam-text. """
listBits = self.convToBits(data) listBits = self.convToBits(data)
listBitsLen = len(listBits)
nextList = self.startList nextList = self.startList
pos = 0 pos = 0
text = "" text = ""
# for performance!
getList = self.getList
while nextList: while nextList:
bit = 8 bit = 8
if pos < len(listBits): if pos < listBitsLen:
bit = listBits[pos] bit = listBits[pos]
#print "Pos:", pos, "- Entering list", nextList, "- Bit:", bit l = getList(nextList)
l = self.getList(nextList) idx = pos < listBitsLen and listBits[pos] or self.base
idx = pos < len(listBits) and listBits[pos] or self.base tok = l[bit]
t = l[bit] #text.append(tok.word)
text += t.word text += tok.word
nextList = t.nextLists[random.randint(0, len(t.nextLists)-1)] nextList = tok.nextLists[randrange(0, len(tok.nextLists))]
if bit != 8: if bit != 8:
pos += 1 pos += 1
return text return text
@ -372,11 +378,12 @@ class SpamGenerator(TextGenerator):
""" Decode spam-text to original data. """ """ Decode spam-text to original data. """
text = text.lstrip().replace("\r\n", "\n") text = text.lstrip().replace("\r\n", "\n")
nextLists = [self.startList] nextLists = [self.startList]
result = [] result = deque()
while len(text) > 0 and len(nextLists) > 0 and nextLists[0]: findInList = self.findInList
while text != "" and len(nextLists) > 0 and nextLists[0]:
match = False match = False
for listname in nextLists: for listname in nextLists:
(match, bits, token) = self.findInList(text, listname) (match, bits, token) = findInList(text, listname)
if match: if match:
#print "matched value ", bits, "word", token.word #print "matched value ", bits, "word", token.word
if bits != 8: if bits != 8:
@ -403,8 +410,10 @@ class SpamGenerator(TextGenerator):
sList = self.getList(listname) sList = self.getList(listname)
for key in sList: for key in sList:
#print "\tTesting word: ", sList[key].word #print "\tTesting word: ", sList[key].word
if sList[key].word and (text.startswith(sList[key].word) or \ w = sList[key].word
text.replace("\n", "").startswith(sList[key].word.replace("\n", ""))): if w and (text.startswith(w)):
#or \
#text.replace("\n", "", 1).startswith(sList[key].word.replace("\n", "", 1))):
# HACK: Newline matching problem, mail classes add extra newlines # HACK: Newline matching problem, mail classes add extra newlines
# ==> matching not possible # ==> matching not possible
token = sList[key] token = sList[key]
@ -415,26 +424,34 @@ class SpamGenerator(TextGenerator):
def main(): def main():
""" Main function, does en- and decoding test for testing purposes. """ """ Main function, does en- and decoding test for testing purposes. """
data = "\xF2\x51\x92\x61\x9d\x1f\x0F\xb7\xaa\xc1" # data = "\xF2\x51\x92\x61\x9d\x1f\x0F\xb7\xaa\xc1"
for d in data: #data = "".join(sys.argv[1:])
print ord(d), #for d in data:
print "" # print ord(d),
#print ""
#t = SpamGenerator()
#d = t.convToBits(data)
#e = t.convToNums(d)
#print d
#for a in e:
# print ord(a),
#print ""
#msg = t.encode(data)
##print res, "\n"
#res = t.decode(msg)
#print
#print d
#for d in data:
# print ord(d),
#print ""
#print msg
t = SpamGenerator() t = SpamGenerator()
d = t.convToBits(data) #data = "".join([chr(randint(0, 255)) for i in range(1500)])
e = t.convToNums(d) #t.encode(data)
print d #return
for a in e: msg = open("file.txt", "r").read()
print ord(a), t.decode(msg)
print "" print t.encode("Hallo ihr beiden")
msg = t.encode(data)
#print res, "\n"
res = t.decode(msg)
print
print d
for d in data:
print ord(d),
print ""
print msg
if __name__ == '__main__': if __name__ == '__main__':
main() main()