summaryrefslogtreecommitdiffstats
path: root/debian/pyrex/pyrex-0.9.9/Pyrex/Plex
diff options
context:
space:
mode:
authorSlávek Banko <slavek.banko@axis.cz>2021-03-26 13:52:33 +0100
committerSlávek Banko <slavek.banko@axis.cz>2021-03-26 13:52:33 +0100
commit0f27805eedcc40ae34009aa31a4dc08cb949f867 (patch)
tree8b1c8995d7fdab97acde4bd7c63f96d378c34d02 /debian/pyrex/pyrex-0.9.9/Pyrex/Plex
parentbad411472a12b93f8bfca6b7ca52d89488a8d8ce (diff)
downloadextra-dependencies-0f27805e.tar.gz
extra-dependencies-0f27805e.zip
DEB pyrex: Added to repository.
Signed-off-by: Slávek Banko <slavek.banko@axis.cz>
Diffstat (limited to 'debian/pyrex/pyrex-0.9.9/Pyrex/Plex')
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/Actions.py109
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/DFA.py156
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/Errors.py52
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/Lexicons.py192
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/Machines.py326
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/Regexps.py557
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/Scanners.py377
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/Timing.py22
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py154
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/Transitions.py253
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/__init__.py40
-rwxr-xr-xdebian/pyrex/pyrex-0.9.9/Pyrex/Plex/test_tm.py24
12 files changed, 2262 insertions, 0 deletions
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Actions.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Actions.py
new file mode 100755
index 00000000..23253a90
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Actions.py
@@ -0,0 +1,109 @@
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Actions for use in token specifications
+#
+#=======================================================================
+
+class Action:
+
+ def same_as(self, other):
+ return self is other
+
+
+class Return(Action):
+ """
+ Internal Plex action which causes |value| to
+ be returned as the value of the associated token
+ """
+
+ value = None
+
+ def __init__(self, value):
+ self.value = value
+
+ def perform(self, token_stream, text):
+ return self.value
+
+ def same_as(self, other):
+ return isinstance(other, Return) and self.value == other.value
+
+ def __repr__(self):
+ return "Return(%s)" % repr(self.value)
+
+
+class Call(Action):
+ """
+ Internal Plex action which causes a function to be called.
+ """
+
+ function = None
+
+ def __init__(self, function):
+ self.function = function
+
+ def perform(self, token_stream, text):
+ return self.function(token_stream, text)
+
+ def __repr__(self):
+ return "Call(%s)" % self.function.__name__
+
+ def same_as(self, other):
+ return isinstance(other, Call) and self.function is other.function
+
+
+class Begin(Action):
+ """
+ Begin(state_name) is a Plex action which causes the Scanner to
+ enter the state |state_name|. See the docstring of Plex.Lexicon
+ for more information.
+ """
+
+ state_name = None
+
+ def __init__(self, state_name):
+ self.state_name = state_name
+
+ def perform(self, token_stream, text):
+ token_stream.begin(self.state_name)
+
+ def __repr__(self):
+ return "Begin(%s)" % self.state_name
+
+ def same_as(self, other):
+ return isinstance(other, Begin) and self.state_name == other.state_name
+
+
+class Ignore(Action):
+ """
+ IGNORE is a Plex action which causes its associated token
+ to be ignored. See the docstring of Plex.Lexicon for more
+ information.
+ """
+ def perform(self, token_stream, text):
+ return None
+
+ def __repr__(self):
+ return "IGNORE"
+
+IGNORE = Ignore()
+IGNORE.__doc__ = Ignore.__doc__
+
+class Text(Action):
+ """
+ TEXT is a Plex action which causes the text of a token to
+ be returned as the value of the token. See the docstring of
+ Plex.Lexicon for more information.
+ """
+
+ def perform(self, token_stream, text):
+ return text
+
+ def __repr__(self):
+ return "TEXT"
+
+TEXT = Text()
+TEXT.__doc__ = Text.__doc__
+
+
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/DFA.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/DFA.py
new file mode 100755
index 00000000..2c0004b0
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/DFA.py
@@ -0,0 +1,156 @@
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Converting NFA to DFA
+#
+#=======================================================================
+
+import Machines
+from Machines import LOWEST_PRIORITY
+from Transitions import TransitionMap
+
+def nfa_to_dfa(old_machine, debug = None):
+ """
+ Given a nondeterministic Machine, return a new equivalent
+ Machine which is deterministic.
+ """
+ # We build a new machine whose states correspond to sets of states
+ # in the old machine. Initially we add a new state corresponding to
+ # the epsilon-closure of each initial old state. Then we give transitions
+ # to each new state which are the union of all transitions out of any
+ # of the corresponding old states. The new state reached on a given
+ # character is the one corresponding to the set of states reachable
+ # on that character from any of the old states. As new combinations of
+ # old states are created, new states are added as needed until closure
+ # is reached.
+ new_machine = Machines.FastMachine()
+ state_map = StateMap(new_machine)
+ # Seed the process using the initial states of the old machine.
+ # Make the corresponding new states into initial states of the new
+ # machine with the same names.
+ for (key, old_state) in old_machine.initial_states.items():
+ new_state = state_map.old_to_new(epsilon_closure(old_state))
+ new_machine.make_initial_state(key, new_state)
+ # Tricky bit here: we add things to the end of this list while we're
+ # iterating over it. The iteration stops when closure is achieved.
+ for new_state in new_machine.states:
+ transitions = TransitionMap()
+ for old_state in state_map.new_to_old(new_state).keys():
+ for event, old_target_states in old_state.transitions.items():
+ if event and old_target_states:
+ transitions.add_set(event, set_epsilon_closure(old_target_states))
+ for event, old_states in transitions.items():
+ new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states))
+ if debug:
+ debug.write("\n===== State Mapping =====\n")
+ state_map.dump(debug)
+ return new_machine
+
+def set_epsilon_closure(state_set):
+ """
+ Given a set of states, return the union of the epsilon
+ closures of its member states.
+ """
+ result = {}
+ for state1 in state_set.keys():
+ for state2 in epsilon_closure(state1).keys():
+ result[state2] = 1
+ return result
+
+def epsilon_closure(state):
+ """
+ Return the set of states reachable from the given state
+ by epsilon moves.
+ """
+ # Cache the result
+ result = state.epsilon_closure
+ if result is None:
+ result = {}
+ state.epsilon_closure = result
+ add_to_epsilon_closure(result, state)
+ return result
+
+def add_to_epsilon_closure(state_set, state):
+ """
+ Recursively add to |state_set| states reachable from the given state
+ by epsilon moves.
+ """
+ if not state_set.get(state, 0):
+ state_set[state] = 1
+ state_set_2 = state.transitions.get_epsilon()
+ if state_set_2:
+ for state2 in state_set_2.keys():
+ add_to_epsilon_closure(state_set, state2)
+
+class StateMap:
+ """
+ Helper class used by nfa_to_dfa() to map back and forth between
+ sets of states from the old machine and states of the new machine.
+ """
+ new_machine = None # Machine
+ old_to_new_dict = None # {(old_state,...) : new_state}
+ new_to_old_dict = None # {id(new_state) : old_state_set}
+
+ def __init__(self, new_machine):
+ self.new_machine = new_machine
+ self.old_to_new_dict = {}
+ self.new_to_old_dict= {}
+
+ def old_to_new(self, old_state_set):
+ """
+ Return the state of the new machine corresponding to the
+ set of old machine states represented by |state_set|. A new
+ state will be created if necessary. If any of the old states
+ are accepting states, the new state will be an accepting state
+ with the highest priority action from the old states.
+ """
+ key = self.make_key(old_state_set)
+ new_state = self.old_to_new_dict.get(key, None)
+ if not new_state:
+ action = self.highest_priority_action(old_state_set)
+ new_state = self.new_machine.new_state(action)
+ self.old_to_new_dict[key] = new_state
+ self.new_to_old_dict[id(new_state)] = old_state_set
+ #for old_state in old_state_set.keys():
+ #new_state.merge_actions(old_state)
+ return new_state
+
+ def highest_priority_action(self, state_set):
+ best_action = None
+ best_priority = LOWEST_PRIORITY
+ for state in state_set.keys():
+ priority = state.action_priority
+ if priority > best_priority:
+ best_action = state.action
+ best_priority = priority
+ return best_action
+
+# def old_to_new_set(self, old_state_set):
+# """
+# Return the new state corresponding to a set of old states as
+# a singleton set.
+# """
+# return {self.old_to_new(old_state_set):1}
+
+ def new_to_old(self, new_state):
+ """Given a new state, return a set of corresponding old states."""
+ return self.new_to_old_dict[id(new_state)]
+
+ def make_key(self, state_set):
+ """
+ Convert a set of states into a uniquified
+ sorted tuple suitable for use as a dictionary key.
+ """
+ lst = state_set.keys()
+ lst.sort()
+ return tuple(lst)
+
+ def dump(self, file):
+ from Transitions import state_set_str
+ for new_state in self.new_machine.states:
+ old_state_set = self.new_to_old_dict[id(new_state)]
+ file.write(" State %s <-- %s\n" % (
+ new_state['number'], state_set_str(old_state_set)))
+
+
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Errors.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Errors.py
new file mode 100755
index 00000000..ae033672
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Errors.py
@@ -0,0 +1,52 @@
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Exception classes
+#
+#=======================================================================
+
+import exceptions
+
+class PlexError(exceptions.Exception):
+ message = ""
+
+class PlexTypeError(PlexError, TypeError):
+ pass
+
+class PlexValueError(PlexError, ValueError):
+ pass
+
+class InvalidRegex(PlexError):
+ pass
+
+class InvalidToken(PlexError):
+
+ def __init__(self, token_number, message):
+ PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
+
+class InvalidScanner(PlexError):
+ pass
+
+class AmbiguousAction(PlexError):
+ message = "Two tokens with different actions can match the same string"
+
+ def __init__(self):
+ pass
+
+class UnrecognizedInput(PlexError):
+ scanner = None
+ position = None
+ state_name = None
+
+ def __init__(self, scanner, state_name):
+ self.scanner = scanner
+ self.position = scanner.position()
+ self.state_name = state_name
+
+ def __str__(self):
+ return ("'%s', line %d, char %d: Token not recognised in state %s"
+ % (self.position + (repr(self.state_name),)))
+
+
+
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Lexicons.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Lexicons.py
new file mode 100755
index 00000000..32b12c43
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Lexicons.py
@@ -0,0 +1,192 @@
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Lexical Analyser Specification
+#
+#=======================================================================
+
+import types
+
+import Actions
+import DFA
+import Errors
+import Machines
+import Regexps
+
+# debug_flags for Lexicon constructor
+DUMP_NFA = 1
+DUMP_DFA = 2
+
+class State:
+ """
+ This class is used as part of a Plex.Lexicon specification to
+ introduce a user-defined state.
+
+ Constructor:
+
+ State(name, token_specifications)
+ """
+
+ name = None
+ tokens = None
+
+ def __init__(self, name, tokens):
+ self.name = name
+ self.tokens = tokens
+
+class Lexicon:
+ """
+ Lexicon(specification) builds a lexical analyser from the given
+ |specification|. The specification consists of a list of
+ specification items. Each specification item may be either:
+
+ 1) A token definition, which is a tuple:
+
+ (pattern, action)
+
+ The |pattern| is a regular axpression built using the
+ constructors defined in the Plex module.
+
+ The |action| is the action to be performed when this pattern
+ is recognised (see below).
+
+ 2) A state definition:
+
+ State(name, tokens)
+
+ where |name| is a character string naming the state,
+ and |tokens| is a list of token definitions as
+ above. The meaning and usage of states is described
+ below.
+
+ Actions
+ -------
+
+ The |action| in a token specication may be one of three things:
+
+ 1) A function, which is called as follows:
+
+ function(scanner, text)
+
+ where |scanner| is the relevant Scanner instance, and |text|
+ is the matched text. If the function returns anything
+ other than None, that value is returned as the value of the
+ token. If it returns None, scanning continues as if the IGNORE
+ action were specified (see below).
+
+ 2) One of the following special actions:
+
+ IGNORE means that the recognised characters will be treated as
+ white space and ignored. Scanning will continue until
+ the next non-ignored token is recognised before returning.
+
+ TEXT causes the scanned text itself to be returned as the
+ value of the token.
+
+ 3) Any other value, which is returned as the value of the token.
+
+ States
+ ------
+
+ At any given time, the scanner is in one of a number of states.
+ Associated with each state is a set of possible tokens. When scanning,
+ only tokens associated with the current state are recognised.
+
+ There is a default state, whose name is the empty string. Token
+ definitions which are not inside any State definition belong to
+ the default state.
+
+ The initial state of the scanner is the default state. The state can
+ be changed in one of two ways:
+
+ 1) Using Begin(state_name) as the action of a token.
+
+ 2) Calling the begin(state_name) method of the Scanner.
+
+ To change back to the default state, use '' as the state name.
+ """
+
+ machine = None # Machine
+ tables = None # StateTableMachine
+
+ def __init__(self, specifications, debug = None, debug_flags = 7, timings = None):
+ if type(specifications) <> types.ListType:
+ raise Errors.InvalidScanner("Scanner definition is not a list")
+ if timings:
+ from Timing import time
+ total_time = 0.0
+ time1 = time()
+ nfa = Machines.Machine()
+ default_initial_state = nfa.new_initial_state('')
+ token_number = 1
+ for spec in specifications:
+ if isinstance(spec, State):
+ user_initial_state = nfa.new_initial_state(spec.name)
+ for token in spec.tokens:
+ self.add_token_to_machine(
+ nfa, user_initial_state, token, token_number)
+ token_number = token_number + 1
+ elif type(spec) == types.TupleType:
+ self.add_token_to_machine(
+ nfa, default_initial_state, spec, token_number)
+ token_number = token_number + 1
+ else:
+ raise Errors.InvalidToken(
+ token_number,
+ "Expected a token definition (tuple) or State instance")
+ if timings:
+ time2 = time()
+ total_time = total_time + (time2 - time1)
+ time3 = time()
+ if debug and (debug_flags & 1):
+ debug.write("\n============= NFA ===========\n")
+ nfa.dump(debug)
+ dfa = DFA.nfa_to_dfa(nfa, debug = (debug_flags & 3) == 3 and debug)
+ if timings:
+ time4 = time()
+ total_time = total_time + (time4 - time3)
+ if debug and (debug_flags & 2):
+ debug.write("\n============= DFA ===========\n")
+ dfa.dump(debug)
+ if timings:
+ timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))
+ timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))
+ timings.write("TOTAL : %5.2f\n" % total_time)
+ self.machine = dfa
+
+ def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
+ try:
+ (re, action_spec) = self.parse_token_definition(token_spec)
+ # Disabled this -- matching empty strings can be useful
+ #if re.nullable:
+ # raise Errors.InvalidToken(
+ # token_number, "Pattern can match 0 input symbols")
+ if isinstance(action_spec, Actions.Action):
+ action = action_spec
+ elif callable(action_spec):
+ action = Actions.Call(action_spec)
+ else:
+ action = Actions.Return(action_spec)
+ final_state = machine.new_state()
+ re.build_machine(machine, initial_state, final_state,
+ match_bol = 1, nocase = 0)
+ final_state.set_action(action, priority = -token_number)
+ except Errors.PlexError, e:
+ raise e.__class__("Token number %d: %s" % (token_number, e))
+
+ def parse_token_definition(self, token_spec):
+ if type(token_spec) <> types.TupleType:
+ raise Errors.InvalidToken("Token definition is not a tuple")
+ if len(token_spec) <> 2:
+ raise Errors.InvalidToken("Wrong number of items in token definition")
+ pattern, action = token_spec
+ if not isinstance(pattern, Regexps.RE):
+ raise Errors.InvalidToken("Pattern is not an RE instance")
+ return (pattern, action)
+
+ def get_initial_state(self, name):
+ return self.machine.get_initial_state(name)
+
+
+
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Machines.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Machines.py
new file mode 100755
index 00000000..fb9ba717
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Machines.py
@@ -0,0 +1,326 @@
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Classes for building NFAs and DFAs
+#
+#=======================================================================
+
+import string
+import sys
+from sys import maxint
+from types import TupleType
+
+from Transitions import TransitionMap
+
+LOWEST_PRIORITY = -sys.maxint
+
+class Machine:
+ """A collection of Nodes representing an NFA or DFA."""
+ states = None # [Node]
+ next_state_number = 1
+ initial_states = None # {(name, bol): Node}
+
+ def __init__(self):
+ self.states = []
+ self.initial_states = {}
+
+ def __del__(self):
+ #print "Destroying", self ###
+ for state in self.states:
+ state.destroy()
+
+ def new_state(self):
+ """Add a new state to the machine and return it."""
+ s = Node()
+ n = self.next_state_number
+ self.next_state_number = n + 1
+ s.number = n
+ self.states.append(s)
+ return s
+
+ def new_initial_state(self, name):
+ state = self.new_state()
+ self.make_initial_state(name, state)
+ return state
+
+ def make_initial_state(self, name, state):
+ self.initial_states[name] = state
+
+ def get_initial_state(self, name):
+ return self.initial_states[name]
+
+ def dump(self, file):
+ file.write("Plex.Machine:\n")
+ if self.initial_states is not None:
+ file.write(" Initial states:\n")
+ for (name, state) in self.initial_states.items():
+ file.write(" '%s': %d\n" % (name, state.number))
+ for s in self.states:
+ s.dump(file)
+
+class Node:
+ """A state of an NFA or DFA."""
+ transitions = None # TransitionMap
+ action = None # Action
+ action_priority = None # integer
+ number = 0 # for debug output
+ epsilon_closure = None # used by nfa_to_dfa()
+
+ def __init__(self):
+ # Preinitialise the list of empty transitions, because
+ # the nfa-to-dfa algorithm needs it
+ #self.transitions = {'':[]}
+ self.transitions = TransitionMap()
+ self.action_priority = LOWEST_PRIORITY
+
+ def destroy(self):
+ #print "Destroying", self ###
+ self.transitions = None
+ self.action = None
+ self.epsilon_closure = None
+
+ def add_transition(self, event, new_state):
+ self.transitions.add(event, new_state)
+
+ def link_to(self, state):
+ """Add an epsilon-move from this state to another state."""
+ self.add_transition('', state)
+
+ def set_action(self, action, priority):
+ """Make this an accepting state with the given action. If
+ there is already an action, choose the action with highest
+ priority."""
+ if priority > self.action_priority:
+ self.action = action
+ self.action_priority = priority
+
+ def get_action(self):
+ return self.action
+
+ def get_action_priority(self):
+ return self.action_priority
+
+# def merge_actions(self, other_state):
+# """Merge actions of other state into this state according
+# to their priorities."""
+# action = other_state.get_action()
+# priority = other_state.get_action_priority()
+# self.set_action(action, priority)
+
+ def is_accepting(self):
+ return self.action is not None
+
+ def __str__(self):
+ return "State %d" % self.number
+
+ def dump(self, file):
+ import string
+ # Header
+ file.write(" State %d:\n" % self.number)
+ # Transitions
+# self.dump_transitions(file)
+ self.transitions.dump(file)
+ # Action
+ action = self.action
+ priority = self.action_priority
+ if action is not None:
+ file.write(" %s [priority %d]\n" % (action, priority))
+
+
+class FastMachine:
+ """
+ FastMachine is a deterministic machine represented in a way that
+ allows fast scanning.
+ """
+ initial_states = None # {state_name:state}
+ states = None # [state]
+ # where state = {event:state, 'else':state, 'action':Action}
+ next_number = 1 # for debugging
+
+ new_state_template = {
+ '':None, 'bol':None, 'eol':None, 'eof':None, 'else':None
+ }
+
+ def __init__(self, old_machine = None):
+ self.initial_states = initial_states = {}
+ self.states = []
+ if old_machine:
+ self.old_to_new = old_to_new = {}
+ for old_state in old_machine.states:
+ new_state = self.new_state()
+ old_to_new[old_state] = new_state
+ for name, old_state in old_machine.initial_states.items():
+ initial_states[name] = old_to_new[old_state]
+ for old_state in old_machine.states:
+ new_state = old_to_new[old_state]
+ for event, old_state_set in old_state.transitions.items():
+ if old_state_set:
+ new_state[event] = old_to_new[old_state_set.keys()[0]]
+ else:
+ new_state[event] = None
+ new_state['action'] = old_state.action
+
+ def __del__(self):
+ for state in self.states:
+ state.clear()
+
+ def new_state(self, action = None):
+ number = self.next_number
+ self.next_number = number + 1
+ result = self.new_state_template.copy()
+ result['number'] = number
+ result['action'] = action
+ self.states.append(result)
+ return result
+
+ def make_initial_state(self, name, state):
+ self.initial_states[name] = state
+
+ def add_transitions(self, state, event, new_state):
+ if type(event) == TupleType:
+ code0, code1 = event
+ if code0 == -maxint:
+ state['else'] = new_state
+ elif code1 <> maxint:
+ while code0 < code1:
+ state[chr(code0)] = new_state
+ code0 = code0 + 1
+ else:
+ state[event] = new_state
+
+ def get_initial_state(self, name):
+ return self.initial_states[name]
+
+ def dump(self, file):
+ file.write("Plex.FastMachine:\n")
+ file.write(" Initial states:\n")
+ for name, state in self.initial_states.items():
+ file.write(" %s: %s\n" % (repr(name), state['number']))
+ for state in self.states:
+ self.dump_state(state, file)
+
+ def dump_state(self, state, file):
+ import string
+ # Header
+ file.write(" State %d:\n" % state['number'])
+ # Transitions
+ self.dump_transitions(state, file)
+ # Action
+ action = state['action']
+ if action is not None:
+ file.write(" %s\n" % action)
+
+ def dump_transitions(self, state, file):
+ chars_leading_to_state = {}
+ special_to_state = {}
+ for (c, s) in state.items():
+ if len(c) == 1:
+ chars = chars_leading_to_state.get(id(s), None)
+ if chars is None:
+ chars = []
+ chars_leading_to_state[id(s)] = chars
+ chars.append(c)
+ elif len(c) <= 4:
+ special_to_state[c] = s
+ ranges_to_state = {}
+ for state in self.states:
+ char_list = chars_leading_to_state.get(id(state), None)
+ if char_list:
+ ranges = self.chars_to_ranges(char_list)
+ ranges_to_state[ranges] = state
+ ranges_list = ranges_to_state.keys()
+ ranges_list.sort()
+ for ranges in ranges_list:
+ key = self.ranges_to_string(ranges)
+ state = ranges_to_state[ranges]
+ file.write(" %s --> State %d\n" % (key, state['number']))
+ for key in ('bol', 'eol', 'eof', 'else'):
+ state = special_to_state.get(key, None)
+ if state:
+ file.write(" %s --> State %d\n" % (key, state['number']))
+
+ def chars_to_ranges(self, char_list):
+ char_list.sort()
+ i = 0
+ n = len(char_list)
+ result = []
+ while i < n:
+ c1 = ord(char_list[i])
+ c2 = c1
+ i = i + 1
+ while i < n and ord(char_list[i]) == c2 + 1:
+ i = i + 1
+ c2 = c2 + 1
+ result.append((chr(c1), chr(c2)))
+ return tuple(result)
+
+ def ranges_to_string(self, range_list):
+ return string.join(map(self.range_to_string, range_list), ",")
+
+ def range_to_string(self, (c1, c2)):
+ if c1 == c2:
+ return repr(c1)
+ else:
+ return "%s..%s" % (repr(c1), repr(c2))
+##
+## (Superseded by Machines.FastMachine)
+##
+## class StateTableMachine:
+## """
+## StateTableMachine is an alternative representation of a Machine
+## that can be run more efficiently.
+## """
+## initial_states = None # {state_name:state_index}
+## states = None # [([state] indexed by char code, Action)]
+
+## special_map = {'bol':256, 'eol':257, 'eof':258}
+
+## def __init__(self, m):
+## """
+## Initialise StateTableMachine from Machine |m|.
+## """
+## initial_states = self.initial_states = {}
+## states = self.states = [None]
+## old_to_new = {}
+## i = 1
+## for old_state in m.states:
+## new_state = ([0] * 259, old_state.get_action())
+## states.append(new_state)
+## old_to_new[old_state] = i # new_state
+## i = i + 1
+## for name, old_state in m.initial_states.items():
+## initial_states[name] = old_to_new[old_state]
+## for old_state in m.states:
+## new_state_index = old_to_new[old_state]
+## new_table = states[new_state_index][0]
+## transitions = old_state.transitions
+## for c, old_targets in transitions.items():
+## if old_targets:
+## old_target = old_targets[0]
+## new_target_index = old_to_new[old_target]
+## if len(c) == 1:
+## a = ord(c)
+## else:
+## a = self.special_map[c]
+## new_table[a] = states[new_target_index]
+
+## def dump(self, f):
+## f.write("Plex.StateTableMachine:\n")
+## f.write(" Initial states:\n")
+## for name, index in self.initial_states.items():
+## f.write(" %s: State %d\n" % (
+## repr(name), id(self.states[index])))
+## for i in xrange(1, len(self.states)):
+## table, action = self.states[i]
+## f.write(" State %d:" % i)
+## if action:
+## f.write("%s" % action)
+## f.write("\n")
+## f.write(" %s\n" % map(id,table))
+
+
+
+
+
+
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Regexps.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Regexps.py
new file mode 100755
index 00000000..6164d3bd
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Regexps.py
@@ -0,0 +1,557 @@
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Regular Expressions
+#
+#=======================================================================
+
+import array
+import string
+import types
+from sys import maxint
+
+import Errors
+
+#
+# Constants
+#
+
+BOL = 'bol'
+EOL = 'eol'
+EOF = 'eof'
+
+nl_code = ord('\n')
+
+#
+# Helper functions
+#
+
+def chars_to_ranges(s):
+ """
+ Return a list of character codes consisting of pairs
+ [code1a, code1b, code2a, code2b,...] which cover all
+ the characters in |s|.
+ """
+ char_list = list(s)
+ char_list.sort()
+ i = 0
+ n = len(char_list)
+ result = []
+ while i < n:
+ code1 = ord(char_list[i])
+ code2 = code1 + 1
+ i = i + 1
+ while i < n and code2 >= ord(char_list[i]):
+ code2 = code2 + 1
+ i = i + 1
+ result.append(code1)
+ result.append(code2)
+ return result
+
+def uppercase_range(code1, code2):
+ """
+ If the range of characters from code1 to code2-1 includes any
+ lower case letters, return the corresponding upper case range.
+ """
+ code3 = max(code1, ord('a'))
+ code4 = min(code2, ord('z') + 1)
+ if code3 < code4:
+ d = ord('A') - ord('a')
+ return (code3 + d, code4 + d)
+ else:
+ return None
+
+def lowercase_range(code1, code2):
+ """
+ If the range of characters from code1 to code2-1 includes any
+ upper case letters, return the corresponding lower case range.
+ """
+ code3 = max(code1, ord('A'))
+ code4 = min(code2, ord('Z') + 1)
+ if code3 < code4:
+ d = ord('a') - ord('A')
+ return (code3 + d, code4 + d)
+ else:
+ return None
+
+def CodeRanges(code_list):
+ """
+ Given a list of codes as returned by chars_to_ranges, return
+ an RE which will match a character in any of the ranges.
+ """
+ re_list = []
+ for i in xrange(0, len(code_list), 2):
+ re_list.append(CodeRange(code_list[i], code_list[i + 1]))
+ return apply(Alt, tuple(re_list))
+
+def CodeRange(code1, code2):
+ """
+ CodeRange(code1, code2) is an RE which matches any character
+ with a code |c| in the range |code1| <= |c| < |code2|.
+ """
+ if code1 <= nl_code < code2:
+ return Alt(RawCodeRange(code1, nl_code),
+ RawNewline,
+ RawCodeRange(nl_code + 1, code2))
+ else:
+ return RawCodeRange(code1, code2)
+
+#
+# Abstract classes
+#
+
+class RE:
+ """RE is the base class for regular expression constructors.
+ The following operators are defined on REs:
+
+ re1 + re2 is an RE which matches |re1| followed by |re2|
+ re1 | re2 is an RE which matches either |re1| or |re2|
+ """
+
+ nullable = 1 # True if this RE can match 0 input symbols
+ match_nl = 1 # True if this RE can match a string ending with '\n'
+ str = None # Set to a string to override the class's __str__ result
+
+ def build_machine(self, machine, initial_state, final_state,
+ match_bol, nocase):
+ """
+ This method should add states to |machine| to implement this
+ RE, starting at |initial_state| and ending at |final_state|.
+ If |match_bol| is true, the RE must be able to match at the
+ beginning of a line. If nocase is true, upper and lower case
+ letters should be treated as equivalent.
+ """
+ raise exceptions.UnimplementedMethod("%s.build_machine not implemented" %
+ self.__class__.__name__)
+
+ def build_opt(self, m, initial_state, c):
+ """
+ Given a state |s| of machine |m|, return a new state
+ reachable from |s| on character |c| or epsilon.
+ """
+ s = m.new_state()
+ initial_state.link_to(s)
+ initial_state.add_transition(c, s)
+ return s
+
+ def __add__(self, other):
+ return Seq(self, other)
+
+ def __or__(self, other):
+ return Alt(self, other)
+
+ def __str__(self):
+ if self.str:
+ return self.str
+ else:
+ return self.calc_str()
+
+ def check_re(self, num, value):
+ if not isinstance(value, RE):
+ self.wrong_type(num, value, "Plex.RE instance")
+
+ def check_string(self, num, value):
+ if type(value) <> type(''):
+ self.wrong_type(num, value, "string")
+
+ def check_char(self, num, value):
+ self.check_string(num, value)
+ if len(value) <> 1:
+ raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s."
+ "Expected a string of length 1, got: %s" % (
+ num, self.__class__.__name__, repr(value)))
+
+ def wrong_type(self, num, value, expected):
+ if type(value) == types.InstanceType:
+ got = "%s.%s instance" % (
+ value.__class__.__module__, value.__class__.__name__)
+ else:
+ got = type(value).__name__
+ raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s "
+ "(expected %s, got %s" % (
+ num, self.__class__.__name__, expected, got))
+
+#
+# Primitive RE constructors
+# -------------------------
+#
+# These are the basic REs from which all others are built.
+#
+
+## class Char(RE):
+## """
+## Char(c) is an RE which matches the character |c|.
+## """
+
+## nullable = 0
+
+## def __init__(self, char):
+## self.char = char
+## self.match_nl = char == '\n'
+
+## def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+## c = self.char
+## if match_bol and c <> BOL:
+## s1 = self.build_opt(m, initial_state, BOL)
+## else:
+## s1 = initial_state
+## if c == '\n' or c == EOF:
+## s1 = self.build_opt(m, s1, EOL)
+## if len(c) == 1:
+## code = ord(self.char)
+## s1.add_transition((code, code+1), final_state)
+## if nocase and is_letter_code(code):
+## code2 = other_case_code(code)
+## s1.add_transition((code2, code2+1), final_state)
+## else:
+## s1.add_transition(c, final_state)
+
+## def calc_str(self):
+## return "Char(%s)" % repr(self.char)
+
+def Char(c):
+ """
+ Char(c) is an RE which matches the character |c|.
+ """
+ if len(c) == 1:
+ result = CodeRange(ord(c), ord(c) + 1)
+ else:
+ result = SpecialSymbol(c)
+ result.str = "Char(%s)" % repr(c)
+ return result
+
+class RawCodeRange(RE):
+ """
+ RawCodeRange(code1, code2) is a low-level RE which matches any character
+ with a code |c| in the range |code1| <= |c| < |code2|, where the range
+ does not include newline. For internal use only.
+ """
+ nullable = 0
+ match_nl = 0
+ range = None # (code, code)
+ uppercase_range = None # (code, code) or None
+ lowercase_range = None # (code, code) or None
+
+ def __init__(self, code1, code2):
+ self.range = (code1, code2)
+ self.uppercase_range = uppercase_range(code1, code2)
+ self.lowercase_range = lowercase_range(code1, code2)
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ if match_bol:
+ initial_state = self.build_opt(m, initial_state, BOL)
+ initial_state.add_transition(self.range, final_state)
+ if nocase:
+ if self.uppercase_range:
+ initial_state.add_transition(self.uppercase_range, final_state)
+ if self.lowercase_range:
+ initial_state.add_transition(self.lowercase_range, final_state)
+
+ def calc_str(self):
+ return "CodeRange(%d,%d)" % (self.code1, self.code2)
+
+class _RawNewline(RE):
+ """
+ RawNewline is a low-level RE which matches a newline character.
+ For internal use only.
+ """
+ nullable = 0
+ match_nl = 1
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ if match_bol:
+ initial_state = self.build_opt(m, initial_state, BOL)
+ s = self.build_opt(m, initial_state, EOL)
+ s.add_transition((nl_code, nl_code + 1), final_state)
+
+RawNewline = _RawNewline()
+
+
+class SpecialSymbol(RE):
+ """
+ SpecialSymbol(sym) is an RE which matches the special input
+ symbol |sym|, which is one of BOL, EOL or EOF.
+ """
+ nullable = 0
+ match_nl = 0
+ sym = None
+
+ def __init__(self, sym):
+ self.sym = sym
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ # Sequences 'bol bol' and 'bol eof' are impossible, so only need
+ # to allow for bol if sym is eol
+ if match_bol and self.sym == EOL:
+ initial_state = self.build_opt(m, initial_state, BOL)
+ initial_state.add_transition(self.sym, final_state)
+
+
+class Seq(RE):
+ """Seq(re1, re2, re3...) is an RE which matches |re1| followed by
+ |re2| followed by |re3|..."""
+
+ def __init__(self, *re_list):
+ nullable = 1
+ for i in xrange(len(re_list)):
+ re = re_list[i]
+ self.check_re(i, re)
+ nullable = nullable and re.nullable
+ self.re_list = re_list
+ self.nullable = nullable
+ i = len(re_list)
+ match_nl = 0
+ while i:
+ i = i - 1
+ re = re_list[i]
+ if re.match_nl:
+ match_nl = 1
+ break
+ if not re.nullable:
+ break
+ self.match_nl = match_nl
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ re_list = self.re_list
+ if len(re_list) == 0:
+ initial_state.link_to(final_state)
+ else:
+ s1 = initial_state
+ n = len(re_list)
+ for i in xrange(n):
+ if i < n - 1:
+ s2 = m.new_state()
+ else:
+ s2 = final_state
+ re = re_list[i]
+ re.build_machine(m, s1, s2, match_bol, nocase)
+ s1 = s2
+ match_bol = re.match_nl or (match_bol and re.nullable)
+
+ def calc_str(self):
+ return "Seq(%s)" % string.join(map(str, self.re_list), ",")
+
+
+class Alt(RE):
+ """Alt(re1, re2, re3...) is an RE which matches either |re1| or
+ |re2| or |re3|..."""
+
+ def __init__(self, *re_list):
+ self.re_list = re_list
+ nullable = 0
+ match_nl = 0
+ nullable_res = []
+ non_nullable_res = []
+ i = 1
+ for re in re_list:
+ self.check_re(i, re)
+ if re.nullable:
+ nullable_res.append(re)
+ nullable = 1
+ else:
+ non_nullable_res.append(re)
+ if re.match_nl:
+ match_nl = 1
+ i = i + 1
+ self.nullable_res = nullable_res
+ self.non_nullable_res = non_nullable_res
+ self.nullable = nullable
+ self.match_nl = match_nl
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ for re in self.nullable_res:
+ re.build_machine(m, initial_state, final_state, match_bol, nocase)
+ if self.non_nullable_res:
+ if match_bol:
+ initial_state = self.build_opt(m, initial_state, BOL)
+ for re in self.non_nullable_res:
+ re.build_machine(m, initial_state, final_state, 0, nocase)
+
+ def calc_str(self):
+ return "Alt(%s)" % string.join(map(str, self.re_list), ",")
+
+
+class Rep1(RE):
+ """Rep1(re) is an RE which matches one or more repetitions of |re|."""
+
+ def __init__(self, re):
+ self.check_re(1, re)
+ self.re = re
+ self.nullable = re.nullable
+ self.match_nl = re.match_nl
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ s1 = m.new_state()
+ s2 = m.new_state()
+ initial_state.link_to(s1)
+ self.re.build_machine(m, s1, s2, match_bol or self.re.match_nl, nocase)
+ s2.link_to(s1)
+ s2.link_to(final_state)
+
+ def calc_str(self):
+ return "Rep1(%s)" % self.re
+
+
+class SwitchCase(RE):
+ """
+ SwitchCase(re, nocase) is an RE which matches the same strings as RE,
+ but treating upper and lower case letters according to |nocase|. If
+ |nocase| is true, case is ignored, otherwise it is not.
+ """
+ re = None
+ nocase = None
+
+ def __init__(self, re, nocase):
+ self.re = re
+ self.nocase = nocase
+ self.nullable = re.nullable
+ self.match_nl = re.match_nl
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ self.re.build_machine(m, initial_state, final_state, match_bol,
+ self.nocase)
+
+ def calc_str(self):
+ if self.nocase:
+ name = "NoCase"
+ else:
+ name = "Case"
+ return "%s(%s)" % (name, self.re)
+
+#
+# Composite RE constructors
+# -------------------------
+#
+# These REs are defined in terms of the primitive REs.
+#
+
+Empty = Seq()
+Empty.__doc__ = \
+ """
+ Empty is an RE which matches the empty string.
+ """
+Empty.str = "Empty"
+
+def Str1(s):
+ """
+ Str1(s) is an RE which matches the literal string |s|.
+ """
+ result = apply(Seq, tuple(map(Char, s)))
+ result.str = "Str(%s)" % repr(s)
+ return result
+
+def Str(*strs):
+ """
+ Str(s) is an RE which matches the literal string |s|.
+ Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|...
+ """
+ if len(strs) == 1:
+ return Str1(strs[0])
+ else:
+ result = apply(Alt, tuple(map(Str1, strs)))
+ result.str = "Str(%s)" % string.join(map(repr, strs), ",")
+ return result
+
+def Any(s):
+ """
+ Any(s) is an RE which matches any character in the string |s|.
+ """
+ #result = apply(Alt, tuple(map(Char, s)))
+ result = CodeRanges(chars_to_ranges(s))
+ result.str = "Any(%s)" % repr(s)
+ return result
+
+def AnyBut(s):
+ """
+ AnyBut(s) is an RE which matches any character (including
+ newline) which is not in the string |s|.
+ """
+ ranges = chars_to_ranges(s)
+ ranges.insert(0, -maxint)
+ ranges.append(maxint)
+ result = CodeRanges(ranges)
+ result.str = "AnyBut(%s)" % repr(s)
+ return result
+
+AnyChar = AnyBut("")
+AnyChar.__doc__ = \
+ """
+ AnyChar is an RE which matches any single character (including a newline).
+ """
+AnyChar.str = "AnyChar"
+
+def Range(s1, s2 = None):
+ """
+ Range(c1, c2) is an RE which matches any single character in the range
+ |c1| to |c2| inclusive.
+ Range(s) where |s| is a string of even length is an RE which matches
+ any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,...
+ """
+ if s2:
+ result = CodeRange(ord(s1), ord(s2) + 1)
+ result.str = "Range(%s,%s)" % (s1, s2)
+ else:
+ ranges = []
+ for i in range(0, len(s1), 2):
+ ranges.append(CodeRange(ord(s1[i]), ord(s1[i+1]) + 1))
+ result = apply(Alt, tuple(ranges))
+ result.str = "Range(%s)" % repr(s1)
+ return result
+
+def Opt(re):
+ """
+ Opt(re) is an RE which matches either |re| or the empty string.
+ """
+ result = Alt(re, Empty)
+ result.str = "Opt(%s)" % re
+ return result
+
+def Rep(re):
+ """
+ Rep(re) is an RE which matches zero or more repetitions of |re|.
+ """
+ result = Opt(Rep1(re))
+ result.str = "Rep(%s)" % re
+ return result
+
+def NoCase(re):
+ """
+ NoCase(re) is an RE which matches the same strings as RE, but treating
+ upper and lower case letters as equivalent.
+ """
+ return SwitchCase(re, nocase = 1)
+
+def Case(re):
+ """
+ Case(re) is an RE which matches the same strings as RE, but treating
+ upper and lower case letters as distinct, i.e. it cancels the effect
+ of any enclosing NoCase().
+ """
+ return SwitchCase(re, nocase = 0)
+
+#
+# RE Constants
+#
+
+Bol = Char(BOL)
+Bol.__doc__ = \
+ """
+ Bol is an RE which matches the beginning of a line.
+ """
+Bol.str = "Bol"
+
+Eol = Char(EOL)
+Eol.__doc__ = \
+ """
+ Eol is an RE which matches the end of a line.
+ """
+Eol.str = "Eol"
+
+Eof = Char(EOF)
+Eof.__doc__ = \
+ """
+ Eof is an RE which matches the end of the file.
+ """
+Eof.str = "Eof"
+
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Scanners.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Scanners.py
new file mode 100755
index 00000000..6278d88b
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Scanners.py
@@ -0,0 +1,377 @@
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+#
+# Scanning an input stream
+#
+#=======================================================================
+
+import Errors
+from Regexps import BOL, EOL, EOF
+
+class Scanner:
+ """
+ A Scanner is used to read tokens from a stream of characters
+ using the token set specified by a Plex.Lexicon.
+
+ Constructor:
+
+ Scanner(lexicon, stream, name = '')
+
+ See the docstring of the __init__ method for details.
+
+ Methods:
+
+ See the docstrings of the individual methods for more
+ information.
+
+ read() --> (value, text)
+ Reads the next lexical token from the stream.
+
+ position() --> (name, line, col)
+ Returns the position of the last token read using the
+ read() method.
+
+ begin(state_name)
+ Causes scanner to change state.
+
+ produce(value [, text])
+ Causes return of a token value to the caller of the
+ Scanner.
+
+ """
+
+ lexicon = None # Lexicon
+ stream = None # file-like object
+ name = ''
+ buffer = ''
+ buf_start_pos = 0 # position in input of start of buffer
+ next_pos = 0 # position in input of next char to read
+ cur_pos = 0 # position in input of current char
+ cur_line = 1 # line number of current char
+ cur_line_start = 0 # position in input of start of current line
+ start_pos = 0 # position in input of start of token
+ start_line = 0 # line number of start of token
+ start_col = 0 # position in line of start of token
+ text = None # text of last token read
+ initial_state = None # Node
+ state_name = '' # Name of initial state
+ queue = None # list of tokens to be returned
+ trace = 0
+
+ def __init__(self, lexicon, stream, name = ''):
+ """
+ Scanner(lexicon, stream, name = '')
+
+ |lexicon| is a Plex.Lexicon instance specifying the lexical tokens
+ to be recognised.
+
+ |stream| can be a file object or anything which implements a
+ compatible read() method.
+
+ |name| is optional, and may be the name of the file being
+ scanned or any other identifying string.
+ """
+ self.lexicon = lexicon
+ self.stream = stream
+ self.name = name
+ self.queue = []
+ self.initial_state = None
+ self.begin('')
+ self.next_pos = 0
+ self.cur_pos = 0
+ self.cur_line_start = 0
+ self.cur_char = BOL
+ self.input_state = 1
+
+ def read(self):
+ """
+ Read the next lexical token from the stream and return a
+ tuple (value, text), where |value| is the value associated with
+ the token as specified by the Lexicon, and |text| is the actual
+ string read from the stream. Returns (None, '') on end of file.
+ """
+ queue = self.queue
+ while not queue:
+ self.text, action = self.scan_a_token()
+ if action is None:
+ self.produce(None)
+ self.eof()
+ else:
+ value = action.perform(self, self.text)
+ if value is not None:
+ self.produce(value)
+ result = queue[0]
+ del queue[0]
+ return result
+
+ def scan_a_token(self):
+ """
+ Read the next input sequence recognised by the machine
+ and return (text, action). Returns ('', None) on end of
+ file.
+ """
+ self.start_pos = self.cur_pos
+ self.start_line = self.cur_line
+ self.start_col = self.cur_pos - self.cur_line_start
+# if self.trace:
+# action = self.run_machine()
+# else:
+# action = self.run_machine_inlined()
+ action = self.run_machine_inlined()
+ if action:
+ if self.trace:
+ print "Scanner: read: Performing", action, "%d:%d" % (
+ self.start_pos, self.cur_pos)
+ base = self.buf_start_pos
+ text = self.buffer[self.start_pos - base : self.cur_pos - base]
+ return (text, action)
+ else:
+ if self.cur_pos == self.start_pos:
+ if self.cur_char == EOL:
+ self.next_char()
+ if not self.cur_char or self.cur_char == EOF:
+ return ('', None)
+ raise Errors.UnrecognizedInput(self, self.state_name)
+
+ def run_machine(self):
+ """
+ Run the machine until no more transitions are possible.
+ """
+ self.state = self.initial_state
+ self.backup_state = None
+ while self.transition():
+ pass
+ return self.back_up()
+
+ def run_machine_inlined(self):
+ """
+ Inlined version of run_machine for speed.
+ """
+ state = self.initial_state
+ cur_pos = self.cur_pos
+ cur_line = self.cur_line
+ cur_line_start = self.cur_line_start
+ cur_char = self.cur_char
+ input_state = self.input_state
+ next_pos = self.next_pos
+ buffer = self.buffer
+ buf_start_pos = self.buf_start_pos
+ buf_len = len(buffer)
+ backup_state = None
+ trace = self.trace
+ while 1:
+ if trace: #TRACE#
+ print "State %d, %d/%d:%s -->" % ( #TRACE#
+ state['number'], input_state, cur_pos, repr(cur_char)), #TRACE#
+ # Begin inlined self.save_for_backup()
+ #action = state.action #@slow
+ action = state['action'] #@fast
+ if action:
+ backup_state = (
+ action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos)
+ # End inlined self.save_for_backup()
+ c = cur_char
+ #new_state = state.new_state(c) #@slow
+ new_state = state.get(c, -1) #@fast
+ if new_state == -1: #@fast
+ new_state = c and state.get('else') #@fast
+ if new_state:
+ if trace: #TRACE#
+ print "State %d" % new_state['number'] #TRACE#
+ state = new_state
+ # Begin inlined: self.next_char()
+ if input_state == 1:
+ cur_pos = next_pos
+ # Begin inlined: c = self.read_char()
+ buf_index = next_pos - buf_start_pos
+ if buf_index < buf_len:
+ c = buffer[buf_index]
+ next_pos = next_pos + 1
+ else:
+ discard = self.start_pos - buf_start_pos
+ data = self.stream.read(0x1000)
+ buffer = self.buffer[discard:] + data
+ self.buffer = buffer
+ buf_start_pos = buf_start_pos + discard
+ self.buf_start_pos = buf_start_pos
+ buf_len = len(buffer)
+ buf_index = buf_index - discard
+ if data:
+ c = buffer[buf_index]
+ next_pos = next_pos + 1
+ else:
+ c = ''
+ # End inlined: c = self.read_char()
+ if c == '\n':
+ cur_char = EOL
+ input_state = 2
+ elif not c:
+ cur_char = EOL
+ input_state = 4
+ else:
+ cur_char = c
+ elif input_state == 2:
+ cur_char = '\n'
+ input_state = 3
+ elif input_state == 3:
+ cur_line = cur_line + 1
+ cur_line_start = cur_pos = next_pos
+ cur_char = BOL
+ input_state = 1
+ elif input_state == 4:
+ cur_char = EOF
+ input_state = 5
+ else: # input_state = 5
+ cur_char = ''
+ # End inlined self.next_char()
+ else: # not new_state
+ if trace: #TRACE#
+ print "blocked" #TRACE#
+ # Begin inlined: action = self.back_up()
+ if backup_state:
+ (action, cur_pos, cur_line, cur_line_start,
+ cur_char, input_state, next_pos) = backup_state
+ else:
+ action = None
+ break # while 1
+ # End inlined: action = self.back_up()
+ self.cur_pos = cur_pos
+ self.cur_line = cur_line
+ self.cur_line_start = cur_line_start
+ self.cur_char = cur_char
+ self.input_state = input_state
+ self.next_pos = next_pos
+ if trace: #TRACE#
+ if action: #TRACE#
+ print "Doing", action #TRACE#
+ return action
+
+# def transition(self):
+# self.save_for_backup()
+# c = self.cur_char
+# new_state = self.state.new_state(c)
+# if new_state:
+# if self.trace:
+# print "Scanner: read: State %d: %s --> State %d" % (
+# self.state.number, repr(c), new_state.number)
+# self.state = new_state
+# self.next_char()
+# return 1
+# else:
+# if self.trace:
+# print "Scanner: read: State %d: %s --> blocked" % (
+# self.state.number, repr(c))
+# return 0
+
+# def save_for_backup(self):
+# action = self.state.get_action()
+# if action:
+# if self.trace:
+# print "Scanner: read: Saving backup point at", self.cur_pos
+# self.backup_state = (
+# action, self.cur_pos, self.cur_line, self.cur_line_start,
+# self.cur_char, self.input_state, self.next_pos)
+
+# def back_up(self):
+# backup_state = self.backup_state
+# if backup_state:
+# (action, self.cur_pos, self.cur_line, self.cur_line_start,
+# self.cur_char, self.input_state, self.next_pos) = backup_state
+# if self.trace:
+# print "Scanner: read: Backing up to", self.cur_pos
+# return action
+# else:
+# return None
+
+ def next_char(self):
+ input_state = self.input_state
+ if self.trace:
+ print "Scanner: next:", " "*20, "[%d] %d" % (input_state, self.cur_pos),
+ if input_state == 1:
+ self.cur_pos = self.next_pos
+ c = self.read_char()
+ if c == '\n':
+ self.cur_char = EOL
+ self.input_state = 2
+ elif not c:
+ self.cur_char = EOL
+ self.input_state = 4
+ else:
+ self.cur_char = c
+ elif input_state == 2:
+ self.cur_char = '\n'
+ self.input_state = 3
+ elif input_state == 3:
+ self.cur_line = self.cur_line + 1
+ self.cur_line_start = self.cur_pos = self.next_pos
+ self.cur_char = BOL
+ self.input_state = 1
+ elif input_state == 4:
+ self.cur_char = EOF
+ self.input_state = 5
+ else: # input_state = 5
+ self.cur_char = ''
+ if self.trace:
+ print "--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char))
+
+# def read_char(self):
+# """
+# Get the next input character, filling the buffer if necessary.
+# Returns '' at end of file.
+# """
+# next_pos = self.next_pos
+# buf_index = next_pos - self.buf_start_pos
+# if buf_index == len(self.buffer):
+# discard = self.start_pos - self.buf_start_pos
+# data = self.stream.read(0x1000)
+# self.buffer = self.buffer[discard:] + data
+# self.buf_start_pos = self.buf_start_pos + discard
+# buf_index = buf_index - discard
+# if not data:
+# return ''
+# c = self.buffer[buf_index]
+# self.next_pos = next_pos + 1
+# return c
+
+ def position(self):
+ """
+ Return a tuple (name, line, col) representing the location of
+ the last token read using the read() method. |name| is the
+ name that was provided to the Scanner constructor; |line|
+ is the line number in the stream (1-based); |col| is the
+ position within the line of the first character of the token
+ (0-based).
+ """
+ return (self.name, self.start_line, self.start_col)
+
+ def begin(self, state_name):
+ """Set the current state of the scanner to the named state."""
+ self.initial_state = (
+ self.lexicon.get_initial_state(state_name))
+ self.state_name = state_name
+
+ def produce(self, value, text = None):
+ """
+ Called from an action procedure, causes |value| to be returned
+ as the token value from read(). If |text| is supplied, it is
+ returned in place of the scanned text.
+
+ produce() can be called more than once during a single call to an action
+ procedure, in which case the tokens are queued up and returned one
+ at a time by subsequent calls to read(), until the queue is empty,
+ whereupon scanning resumes.
+ """
+ if text is None:
+ text = self.text
+ self.queue.append((value, text))
+
+ def eof(self):
+ """
+ Override this method if you want something to be done at
+ end of file.
+ """
+
+# For backward compatibility:
+setattr(Scanner, "yield", Scanner.produce)
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Timing.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Timing.py
new file mode 100755
index 00000000..f47c5c89
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Timing.py
@@ -0,0 +1,22 @@
+#
+# Get time in platform-dependent way
+#
+
+import os
+from sys import platform, exit, stderr
+
+if platform == 'mac':
+ import MacOS
+ def time():
+ return MacOS.GetTicks() / 60.0
+ timekind = "real"
+elif hasattr(os, 'times'):
+ def time():
+ t = os.times()
+ return t[0] + t[1]
+ timekind = "cpu"
+else:
+ stderr.write(
+ "Don't know how to get time on platform %s\n" % repr(platform))
+ exit(1)
+
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py
new file mode 100755
index 00000000..b3148c1e
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py
@@ -0,0 +1,154 @@
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Traditional Regular Expression Syntax
+#
+#=======================================================================
+
+from Regexps import *
+from Errors import PlexError
+
+class RegexpSyntaxError(PlexError):
+ pass
+
+def re(s):
+ """
+ Convert traditional string representation of regular expression |s|
+ into Plex representation.
+ """
+ return REParser(s).parse_re()
+
+class REParser:
+
+ def __init__(self, s):
+ self.s = s
+ self.i = -1
+ self.end = 0
+ self.next()
+
+ def parse_re(self):
+ re = self.parse_alt()
+ if not self.end:
+ self.error("Unexpected %s" % repr(self.c))
+ return re
+
+ def parse_alt(self):
+ """Parse a set of alternative regexps."""
+ re = self.parse_seq()
+ if self.c == '|':
+ re_list = [re]
+ while self.c == '|':
+ self.next()
+ re_list.append(self.parse_seq())
+ re = apply(Alt, tuple(re_list))
+ return re
+
+ def parse_seq(self):
+ """Parse a sequence of regexps."""
+ re_list = []
+ while not self.end and not self.c in "|)":
+ re_list.append(self.parse_mod())
+ return apply(Seq, tuple(re_list))
+
+ def parse_mod(self):
+ """Parse a primitive regexp followed by *, +, ? modifiers."""
+ re = self.parse_prim()
+ while not self.end and self.c in "*+?":
+ if self.c == '*':
+ re = Rep(re)
+ elif self.c == '+':
+ re = Rep1(re)
+ else: # self.c == '?'
+ re = Opt(re)
+ self.next()
+ return re
+
+ def parse_prim(self):
+ """Parse a primitive regexp."""
+ c = self.get()
+ if c == '.':
+ re = AnyBut("\n")
+ elif c == '^':
+ re = Bol
+ elif c == '$':
+ re = Eol
+ elif c == '(':
+ re = self.parse_alt()
+ self.expect(')')
+ elif c == '[':
+ re = self.parse_charset()
+ self.expect(']')
+ else:
+ if c == '\\':
+ c = self.get()
+ re = Char(c)
+ return re
+
+ def parse_charset(self):
+ """Parse a charset. Does not include the surrounding []."""
+ char_list = []
+ invert = 0
+ if self.c == '^':
+ invert = 1
+ self.next()
+ if self.c == ']':
+ char_list.append(']')
+ self.next()
+ while not self.end and self.c <> ']':
+ c1 = self.get()
+ if self.c == '-' and self.lookahead(1) <> ']':
+ self.next()
+ c2 = self.get()
+ for a in xrange(ord(c1), ord(c2) + 1):
+ char_list.append(chr(a))
+ else:
+ char_list.append(c1)
+ chars = string.join(char_list, "")
+ if invert:
+ return AnyBut(chars)
+ else:
+ return Any(chars)
+
+ def next(self):
+ """Advance to the next char."""
+ s = self.s
+ i = self.i = self.i + 1
+ if i < len(s):
+ self.c = s[i]
+ else:
+ self.c = ''
+ self.end = 1
+
+ def get(self):
+ if self.end:
+ self.error("Premature end of string")
+ c = self.c
+ self.next()
+ return c
+
+ def lookahead(self, n):
+ """Look ahead n chars."""
+ j = self.i + n
+ if j < len(self.s):
+ return self.s[j]
+ else:
+ return ''
+
+ def expect(self, c):
+ """
+ Expect to find character |c| at current position.
+ Raises an exception otherwise.
+ """
+ if self.c == c:
+ self.next()
+ else:
+ self.error("Missing %s" % repr(c))
+
+ def error(self, mess):
+ """Raise exception to signal syntax error in regexp."""
+ raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
+ repr(self.s), self.i, mess))
+
+
+
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Transitions.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Transitions.py
new file mode 100755
index 00000000..c1edd5ef
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Transitions.py
@@ -0,0 +1,253 @@
+#
+# Plex - Transition Maps
+#
+# This version represents state sets direcly as dicts
+# for speed.
+#
+
+from copy import copy
+import string
+from sys import maxint
+from types import TupleType
+
+class TransitionMap:
+ """
+ A TransitionMap maps an input event to a set of states.
+ An input event is one of: a range of character codes,
+ the empty string (representing an epsilon move), or one
+ of the special symbols BOL, EOL, EOF.
+
+ For characters, this implementation compactly represents
+ the map by means of a list:
+
+ [code_0, states_0, code_1, states_1, code_2, states_2,
+ ..., code_n-1, states_n-1, code_n]
+
+ where |code_i| is a character code, and |states_i| is a
+ set of states corresponding to characters with codes |c|
+ in the range |code_i| <= |c| <= |code_i+1|.
+
+ The following invariants hold:
+ n >= 1
+ code_0 == -maxint
+ code_n == maxint
+ code_i < code_i+1 for i in 0..n-1
+ states_0 == states_n-1
+
+ Mappings for the special events '', BOL, EOL, EOF are
+ kept separately in a dictionary.
+ """
+
+ map = None # The list of codes and states
+ special = None # Mapping for special events
+
+ def __init__(self, map = None, special = None):
+ if not map:
+ map = [-maxint, {}, maxint]
+ if not special:
+ special = {}
+ self.map = map
+ self.special = special
+ #self.check() ###
+
+ def add(self, event, new_state,
+ TupleType = TupleType):
+ """
+ Add transition to |new_state| on |event|.
+ """
+ if type(event) == TupleType:
+ code0, code1 = event
+ i = self.split(code0)
+ j = self.split(code1)
+ map = self.map
+ while i < j:
+ map[i + 1][new_state] = 1
+ i = i + 2
+ else:
+ self.get_special(event)[new_state] = 1
+
+ def add_set(self, event, new_set,
+ TupleType = TupleType):
+ """
+ Add transitions to the states in |new_set| on |event|.
+ """
+ if type(event) == TupleType:
+ code0, code1 = event
+ i = self.split(code0)
+ j = self.split(code1)
+ map = self.map
+ while i < j:
+ map[i + 1].update(new_set)
+ i = i + 2
+ else:
+ self.get_special(event).update(new_set)
+
+ def get_epsilon(self,
+ none = None):
+ """
+ Return the mapping for epsilon, or None.
+ """
+ return self.special.get('', none)
+
+ def items(self,
+ len = len):
+ """
+ Return the mapping as a list of ((code1, code2), state_set) and
+ (special_event, state_set) pairs.
+ """
+ result = []
+ map = self.map
+ else_set = map[1]
+ i = 0
+ n = len(map) - 1
+ code0 = map[0]
+ while i < n:
+ set = map[i + 1]
+ code1 = map[i + 2]
+ if set or else_set:
+ result.append(((code0, code1), set))
+ code0 = code1
+ i = i + 2
+ for event, set in self.special.items():
+ if set:
+ result.append((event, set))
+ return result
+
+ # ------------------- Private methods --------------------
+
+ def split(self, code,
+ len = len, maxint = maxint):
+ """
+ Search the list for the position of the split point for |code|,
+ inserting a new split point if necessary. Returns index |i| such
+ that |code| == |map[i]|.
+ """
+ # We use a funky variation on binary search.
+ map = self.map
+ hi = len(map) - 1
+ # Special case: code == map[-1]
+ if code == maxint:
+ return hi
+ # General case
+ lo = 0
+ # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
+ while hi - lo >= 4:
+ # Find midpoint truncated to even index
+ mid = ((lo + hi) / 2) & ~1
+ if code < map[mid]:
+ hi = mid
+ else:
+ lo = mid
+ # map[lo] <= code < map[hi] and hi - lo == 2
+ if map[lo] == code:
+ return lo
+ else:
+ map[hi:hi] = [code, map[hi - 1].copy()]
+ #self.check() ###
+ return hi
+
+ def get_special(self, event):
+ """
+ Get state set for special event, adding a new entry if necessary.
+ """
+ special = self.special
+ set = special.get(event, None)
+ if not set:
+ set = {}
+ special[event] = set
+ return set
+
+ # --------------------- Conversion methods -----------------------
+
+ def __str__(self):
+ map_strs = []
+ map = self.map
+ n = len(map)
+ i = 0
+ while i < n:
+ code = map[i]
+ if code == -maxint:
+ code_str = "-inf"
+ elif code == maxint:
+ code_str = "inf"
+ else:
+ code_str = str(code)
+ map_strs.append(code_str)
+ i = i + 1
+ if i < n:
+ map_strs.append(state_set_str(map[i]))
+ i = i + 1
+ special_strs = {}
+ for event, set in self.special.items():
+ special_strs[event] = state_set_str(set)
+ return "[%s]+%s" % (
+ string.join(map_strs, ","),
+ special_strs
+ )
+
+ # --------------------- Debugging methods -----------------------
+
+ def check(self):
+ """Check data structure integrity."""
+ if not self.map[-3] < self.map[-1]:
+ print self
+ assert 0
+
+ def dump(self, file):
+ map = self.map
+ i = 0
+ n = len(map) - 1
+ while i < n:
+ self.dump_range(map[i], map[i + 2], map[i + 1], file)
+ i = i + 2
+ for event, set in self.special.items():
+ if set:
+ if not event:
+ event = 'empty'
+ self.dump_trans(event, set, file)
+
+ def dump_range(self, code0, code1, set, file):
+ if set:
+ if code0 == -maxint:
+ if code1 == maxint:
+ k = "any"
+ else:
+ k = "< %s" % self.dump_char(code1)
+ elif code1 == maxint:
+ k = "> %s" % self.dump_char(code0 - 1)
+ elif code0 == code1 - 1:
+ k = self.dump_char(code0)
+ else:
+ k = "%s..%s" % (self.dump_char(code0),
+ self.dump_char(code1 - 1))
+ self.dump_trans(k, set, file)
+
+ def dump_char(self, code):
+ if 0 <= code <= 255:
+ return repr(chr(code))
+ else:
+ return "chr(%d)" % code
+
+ def dump_trans(self, key, set, file):
+ file.write(" %s --> %s\n" % (key, self.dump_set(set)))
+
+ def dump_set(self, set):
+ return state_set_str(set)
+
+#
+# State set manipulation functions
+#
+
+#def merge_state_sets(set1, set2):
+# for state in set2.keys():
+# set1[state] = 1
+
+def state_set_str(set):
+ state_list = set.keys()
+ str_list = []
+ for state in state_list:
+ str_list.append("S%d" % state.number)
+ return "[%s]" % string.join(str_list, ",")
+
+
+
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/__init__.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/__init__.py
new file mode 100755
index 00000000..22b9bba3
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/__init__.py
@@ -0,0 +1,40 @@
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+#=======================================================================
+
+"""
+The Plex module provides lexical analysers with similar capabilities
+to GNU Flex. The following classes and functions are exported;
+see the attached docstrings for more information.
+
+ Scanner For scanning a character stream under the
+ direction of a Lexicon.
+
+ Lexicon For constructing a lexical definition
+ to be used by a Scanner.
+
+ Str, Any, AnyBut, AnyChar, Seq, Alt, Opt, Rep, Rep1,
+ Bol, Eol, Eof, Empty
+
+ Regular expression constructors, for building pattern
+ definitions for a Lexicon.
+
+ State For defining scanner states when creating a
+ Lexicon.
+
+ TEXT, IGNORE, Begin
+
+ Actions for associating with patterns when
+ creating a Lexicon.
+"""
+
+from Actions import TEXT, IGNORE, Begin
+from Lexicons import Lexicon, State
+from Regexps import RE, Seq, Alt, Rep1, Empty, Str, Any, AnyBut, AnyChar, Range
+from Regexps import Opt, Rep, Bol, Eol, Eof, Case, NoCase
+from Scanners import Scanner
+
+
+
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/test_tm.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/test_tm.py
new file mode 100755
index 00000000..e08c9e68
--- /dev/null
+++ b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/test_tm.py
@@ -0,0 +1,24 @@
+import sys
+sys.stderr = sys.stdout
+
+from TransitionMaps import TransitionMap
+
+m = TransitionMap()
+print m
+
+def add(c, s):
+ print
+ print "adding", repr(c), "-->", repr(s)
+ m.add_transition(c, s)
+ print m
+ print "keys:", m.keys()
+
+add('a','alpha')
+add('e', 'eta')
+add('f', 'foo')
+add('i', 'iota')
+add('i', 'imp')
+add('eol', 'elephant')
+
+
+