summaryrefslogtreecommitdiffstats
path: root/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py
diff options
context:
space:
mode:
Diffstat (limited to 'debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py')
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py154
1 files changed, 154 insertions, 0 deletions
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py
new file mode 100755
index 00000000..b3148c1e
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py
@@ -0,0 +1,154 @@
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Traditional Regular Expression Syntax
+#
+#=======================================================================
+
+from Regexps import *
+from Errors import PlexError
+
+class RegexpSyntaxError(PlexError):
+ pass
+
+def re(s):
+ """
+ Convert traditional string representation of regular expression |s|
+ into Plex representation.
+ """
+ return REParser(s).parse_re()
+
+class REParser:
+
+ def __init__(self, s):
+ self.s = s
+ self.i = -1
+ self.end = 0
+ self.next()
+
+ def parse_re(self):
+ re = self.parse_alt()
+ if not self.end:
+ self.error("Unexpected %s" % repr(self.c))
+ return re
+
+ def parse_alt(self):
+ """Parse a set of alternative regexps."""
+ re = self.parse_seq()
+ if self.c == '|':
+ re_list = [re]
+ while self.c == '|':
+ self.next()
+ re_list.append(self.parse_seq())
+ re = apply(Alt, tuple(re_list))
+ return re
+
+ def parse_seq(self):
+ """Parse a sequence of regexps."""
+ re_list = []
+ while not self.end and not self.c in "|)":
+ re_list.append(self.parse_mod())
+ return apply(Seq, tuple(re_list))
+
+ def parse_mod(self):
+ """Parse a primitive regexp followed by *, +, ? modifiers."""
+ re = self.parse_prim()
+ while not self.end and self.c in "*+?":
+ if self.c == '*':
+ re = Rep(re)
+ elif self.c == '+':
+ re = Rep1(re)
+ else: # self.c == '?'
+ re = Opt(re)
+ self.next()
+ return re
+
+ def parse_prim(self):
+ """Parse a primitive regexp."""
+ c = self.get()
+ if c == '.':
+ re = AnyBut("\n")
+ elif c == '^':
+ re = Bol
+ elif c == '$':
+ re = Eol
+ elif c == '(':
+ re = self.parse_alt()
+ self.expect(')')
+ elif c == '[':
+ re = self.parse_charset()
+ self.expect(']')
+ else:
+ if c == '\\':
+ c = self.get()
+ re = Char(c)
+ return re
+
+ def parse_charset(self):
+ """Parse a charset. Does not include the surrounding []."""
+ char_list = []
+ invert = 0
+ if self.c == '^':
+ invert = 1
+ self.next()
+ if self.c == ']':
+ char_list.append(']')
+ self.next()
+ while not self.end and self.c <> ']':
+ c1 = self.get()
+ if self.c == '-' and self.lookahead(1) <> ']':
+ self.next()
+ c2 = self.get()
+ for a in xrange(ord(c1), ord(c2) + 1):
+ char_list.append(chr(a))
+ else:
+ char_list.append(c1)
+ chars = string.join(char_list, "")
+ if invert:
+ return AnyBut(chars)
+ else:
+ return Any(chars)
+
+ def next(self):
+ """Advance to the next char."""
+ s = self.s
+ i = self.i = self.i + 1
+ if i < len(s):
+ self.c = s[i]
+ else:
+ self.c = ''
+ self.end = 1
+
+ def get(self):
+ if self.end:
+ self.error("Premature end of string")
+ c = self.c
+ self.next()
+ return c
+
+ def lookahead(self, n):
+ """Look ahead n chars."""
+ j = self.i + n
+ if j < len(self.s):
+ return self.s[j]
+ else:
+ return ''
+
+ def expect(self, c):
+ """
+ Expect to find character |c| at current position.
+ Raises an exception otherwise.
+ """
+ if self.c == c:
+ self.next()
+ else:
+ self.error("Missing %s" % repr(c))
+
+ def error(self, mess):
+ """Raise exception to signal syntax error in regexp."""
+ raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
+ repr(self.s), self.i, mess))
+
+
+