For my project, I am trying to implement a small part of the BitTorrent protocol, which can be found here . In particular, I want to use the Bencoding part, which is a way to securely encode data for transmission over a socket. The format is as follows:
8:a string => "a string" i1234e => 1234 l1:a1:be => ['a', 'b'] d1:a1:b3:one3:twoe => {'a':'b', 'one':two}
The coding part was quite simple, but decoding became quite troublesome. For example, if I have a list of lines, I have no way to split them into separate lines. I tried several different solutions, including PyParsing and a custom parser. I'm currently trying to use regular expressions, and everything seems to be going pretty well, but I'm still depending on the string problem. My current regex is:
(?P<length>\d+):(?P<contents>.{\1})
However, I cannot use the first group as the length of the second group. Is there a good way to do this? Or am I approaching this all wrong and the answer is sitting right in front of me?
, , (.. ), , , . .
, , , .
, .
, :
from StringIO import StringIO import string inputs = ["10:a stringly", "i1234e" , "l1:a1:be", "d1:a1:b3:one3:twoe"] # Constants DICT_TYPE = 'd' LIST_TYPE = 'l' INT_TYPE = 'i' TOKEN_EOF = '' TOKEN_END = 'e' COLON = ':' class BadTypeIndicatorException(Exception):pass def read_int(stream): s = "" while True: ch = stream.read(1) if ch not in [TOKEN_EOF, TOKEN_END, COLON]: s += ch else: break return s def tokenize(stream): s = "" while True: ch = stream.read(1) if ch == TOKEN_END or ch == TOKEN_EOF: return if ch == COLON: length = int(s) yield stream.read(length) s = "" else: s += ch def parse(stream): TYPE = stream.read(1) if TYPE in string.digits: length = int( TYPE + read_int(stream) ) return stream.read(length) elif TYPE is INT_TYPE: return int( read_int(stream) ) elif TYPE is LIST_TYPE: return list(tokenize(stream)) elif TYPE is DICT_TYPE: tokens = list(tokenize(stream)) return dict(zip(tokens[0::2], tokens[1::2])) else: raise BadTypeIndicatorException for input in inputs: stream = StringIO(input) print parse(stream)
, . , . , .
, python, # :
string regex = "^[A-Za-z0-9_]{1," + length + "}$"
1 no , alpanumeric _, , .
, :)
. , . :
def read_string(stream): pos = stream.index(':') length = int(stream[0:pos]) string = stream[pos+1:pos+1+length] return string, stream[pos+1+length:]
def read_list(stream): stream = stream[1:] result = [] while stream[0] != 'e': obj, stream = read_object(stream) result.append(obj) stream = stream[1:] return result
read_object, .
... - , , , .
bdecoding ( bencoding) PERL, , .
, ( [oops]):
. ( ) "-" ( , , -, int ).
i
, ... l d, l/d, , . , e, , .
l
d
e
, , , DESTRUCTIVE. , , , - , , , , , ( : , , ). , , - , .
:
define read-integer (stream): let number 0, sign 1: if string-equal ('-', (c <- read-char (stream))): sign <- -1 else: number <- parse-integer (c) while number? (c <- read-char (stream)): number <- (number * 10) + parse-integer (c) return sign * number define bdecode-string (stream): let count read-integer (stream): return read-n-chars (stream, count) define bdecode-integer (stream): ignore read-char (stream) return read-integer (stream) define bdecode-list (stream): ignore read-char (stream) let list []: while not string-equal ('e', peek-char (stream)): append (list, bdecode (stream)) return list define bdecode-dictionary (stream): let list bdecode-list stream: return dictionarify (list) define bdecode (stream): case peek-char (stream): number? => bdecode-string (stream) 'i' => bdecode-integer (stream) 'l' => bdecode-list (stream) 'd' => bdecode-dictionary (stream)