diff options
Diffstat (limited to 'debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py')
-rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py new file mode 100755 index 00000000..b3148c1e --- /dev/null +++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py @@ -0,0 +1,154 @@ +#======================================================================= +# +# Python Lexical Analyser +# +# Traditional Regular Expression Syntax +# +#======================================================================= + +from Regexps import * +from Errors import PlexError + +class RegexpSyntaxError(PlexError): + pass + +def re(s): + """ + Convert traditional string representation of regular expression |s| + into Plex representation. + """ + return REParser(s).parse_re() + +class REParser: + + def __init__(self, s): + self.s = s + self.i = -1 + self.end = 0 + self.next() + + def parse_re(self): + re = self.parse_alt() + if not self.end: + self.error("Unexpected %s" % repr(self.c)) + return re + + def parse_alt(self): + """Parse a set of alternative regexps.""" + re = self.parse_seq() + if self.c == '|': + re_list = [re] + while self.c == '|': + self.next() + re_list.append(self.parse_seq()) + re = apply(Alt, tuple(re_list)) + return re + + def parse_seq(self): + """Parse a sequence of regexps.""" + re_list = [] + while not self.end and not self.c in "|)": + re_list.append(self.parse_mod()) + return apply(Seq, tuple(re_list)) + + def parse_mod(self): + """Parse a primitive regexp followed by *, +, ? modifiers.""" + re = self.parse_prim() + while not self.end and self.c in "*+?": + if self.c == '*': + re = Rep(re) + elif self.c == '+': + re = Rep1(re) + else: # self.c == '?' + re = Opt(re) + self.next() + return re + + def parse_prim(self): + """Parse a primitive regexp.""" + c = self.get() + if c == '.': + re = AnyBut("\n") + elif c == '^': + re = Bol + elif c == '$': + re = Eol + elif c == '(': + re = self.parse_alt() + self.expect(')') + elif c == '[': + re = self.parse_charset() + self.expect(']') + else: + if c == '\\': + c = self.get() + re = Char(c) + return re + + def parse_charset(self): + """Parse a charset. Does not include the surrounding [].""" + char_list = [] + invert = 0 + if self.c == '^': + invert = 1 + self.next() + if self.c == ']': + char_list.append(']') + self.next() + while not self.end and self.c <> ']': + c1 = self.get() + if self.c == '-' and self.lookahead(1) <> ']': + self.next() + c2 = self.get() + for a in xrange(ord(c1), ord(c2) + 1): + char_list.append(chr(a)) + else: + char_list.append(c1) + chars = string.join(char_list, "") + if invert: + return AnyBut(chars) + else: + return Any(chars) + + def next(self): + """Advance to the next char.""" + s = self.s + i = self.i = self.i + 1 + if i < len(s): + self.c = s[i] + else: + self.c = '' + self.end = 1 + + def get(self): + if self.end: + self.error("Premature end of string") + c = self.c + self.next() + return c + + def lookahead(self, n): + """Look ahead n chars.""" + j = self.i + n + if j < len(self.s): + return self.s[j] + else: + return '' + + def expect(self, c): + """ + Expect to find character |c| at current position. + Raises an exception otherwise. + """ + if self.c == c: + self.next() + else: + self.error("Missing %s" % repr(c)) + + def error(self, mess): + """Raise exception to signal syntax error in regexp.""" + raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % ( + repr(self.s), self.i, mess)) + + + |