From 5836bf07e0118910730bec3a8ad013b89406d934 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 23 Sep 2015 16:50:17 +0200 Subject: [PATCH] Remove one local copy of the Python antlr3 module * Remove the directory thirdparty/antlr3/ * Modify the antlr3 symbolic link to point to thirdparty/antlr3-antlr-3.5/runtime/Python/antlr3/ Change-Id: I8104b7352e96d8e282da4e5bd8ff4fb4817aaa32 --- antlr3 | 2 +- thirdparty/antlr3/__init__.py | 159 -- thirdparty/antlr3/compat.py | 48 - thirdparty/antlr3/constants.py | 57 - thirdparty/antlr3/debug.py | 1137 ------------ thirdparty/antlr3/dfa.py | 213 --- thirdparty/antlr3/dottreegen.py | 210 --- thirdparty/antlr3/exceptions.py | 364 ---- thirdparty/antlr3/extras.py | 47 - thirdparty/antlr3/main.py | 305 ---- thirdparty/antlr3/recognizers.py | 1485 ---------------- thirdparty/antlr3/streams.py | 1522 ---------------- thirdparty/antlr3/tokens.py | 418 ----- thirdparty/antlr3/tree.py | 2843 ------------------------------ thirdparty/antlr3/treewizard.py | 619 ------- 15 files changed, 1 insertion(+), 9428 deletions(-) delete mode 100644 thirdparty/antlr3/__init__.py delete mode 100644 thirdparty/antlr3/compat.py delete mode 100644 thirdparty/antlr3/constants.py delete mode 100644 thirdparty/antlr3/debug.py delete mode 100644 thirdparty/antlr3/dfa.py delete mode 100644 thirdparty/antlr3/dottreegen.py delete mode 100644 thirdparty/antlr3/exceptions.py delete mode 100644 thirdparty/antlr3/extras.py delete mode 100644 thirdparty/antlr3/main.py delete mode 100644 thirdparty/antlr3/recognizers.py delete mode 100644 thirdparty/antlr3/streams.py delete mode 100644 thirdparty/antlr3/tokens.py delete mode 100644 thirdparty/antlr3/tree.py delete mode 100644 thirdparty/antlr3/treewizard.py diff --git a/antlr3 b/antlr3 index fd210d6e4..6352825c6 120000 --- a/antlr3 +++ b/antlr3 @@ -1 +1 @@ -thirdparty/antlr3 \ No newline at end of file +thirdparty/antlr3-antlr-3.5/runtime/Python/antlr3/ \ No newline at end of file diff --git a/thirdparty/antlr3/__init__.py b/thirdparty/antlr3/__init__.py deleted file mode 100644 index 58c2a52a3..000000000 --- a/thirdparty/antlr3/__init__.py +++ /dev/null @@ -1,159 +0,0 @@ -""" @package antlr3 -@brief ANTLR3 runtime package - -This module contains all support classes, which are needed to use recognizers -generated by ANTLR3. - -@mainpage - -\note Please be warned that the line numbers in the API documentation do not -match the real locations in the source code of the package. This is an -unintended artifact of doxygen, which I could only convince to use the -correct module names by concatenating all files from the package into a single -module file... - -Here is a little overview over the most commonly used classes provided by -this runtime: - -@section recognizers Recognizers - -These recognizers are baseclasses for the code which is generated by ANTLR3. - -- BaseRecognizer: Base class with common recognizer functionality. -- Lexer: Base class for lexers. -- Parser: Base class for parsers. -- tree.TreeParser: Base class for %tree parser. - -@section streams Streams - -Each recognizer pulls its input from one of the stream classes below. Streams -handle stuff like buffering, look-ahead and seeking. - -A character stream is usually the first element in the pipeline of a typical -ANTLR3 application. It is used as the input for a Lexer. - -- ANTLRStringStream: Reads from a string objects. The input should be a unicode - object, or ANTLR3 will have trouble decoding non-ascii data. -- ANTLRFileStream: Opens a file and read the contents, with optional character - decoding. -- ANTLRInputStream: Reads the date from a file-like object, with optional - character decoding. - -A Parser needs a TokenStream as input (which in turn is usually fed by a -Lexer): - -- CommonTokenStream: A basic and most commonly used TokenStream - implementation. -- TokenRewriteStream: A modification of CommonTokenStream that allows the - stream to be altered (by the Parser). See the 'tweak' example for a usecase. - -And tree.TreeParser finally fetches its input from a tree.TreeNodeStream: - -- tree.CommonTreeNodeStream: A basic and most commonly used tree.TreeNodeStream - implementation. - - -@section tokenstrees Tokens and Trees - -A Lexer emits Token objects which are usually buffered by a TokenStream. A -Parser can build a Tree, if the output=AST option has been set in the grammar. - -The runtime provides these Token implementations: - -- CommonToken: A basic and most commonly used Token implementation. -- ClassicToken: A Token object as used in ANTLR 2.x, used to %tree - construction. - -Tree objects are wrapper for Token objects. - -- tree.CommonTree: A basic and most commonly used Tree implementation. - -A tree.TreeAdaptor is used by the parser to create tree.Tree objects for the -input Token objects. - -- tree.CommonTreeAdaptor: A basic and most commonly used tree.TreeAdaptor -implementation. - - -@section Exceptions - -RecognitionException are generated, when a recognizer encounters incorrect -or unexpected input. - -- RecognitionException - - MismatchedRangeException - - MismatchedSetException - - MismatchedNotSetException - . - - MismatchedTokenException - - MismatchedTreeNodeException - - NoViableAltException - - EarlyExitException - - FailedPredicateException - . -. - -A tree.RewriteCardinalityException is raised, when the parsers hits a -cardinality mismatch during AST construction. Although this is basically a -bug in your grammar, it can only be detected at runtime. - -- tree.RewriteCardinalityException - - tree.RewriteEarlyExitException - - tree.RewriteEmptyStreamException - . -. - -""" - -# tree.RewriteRuleElementStream -# tree.RewriteRuleSubtreeStream -# tree.RewriteRuleTokenStream -# CharStream -# DFA -# TokenSource - -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os -import sys - -__version__ = '3.4' - -# This runtime is compatible with generated parsers using the following -# API versions. 'HEAD' is only used by unittests. -compatible_api_versions = ['HEAD', 1] - -top_dir = os.path.normpath(os.path.join(os.path.abspath(__file__), - os.pardir)) -sys.path.append(top_dir) - -from antlr3.constants import * -from antlr3.dfa import * -from antlr3.exceptions import * -from antlr3.recognizers import * -from antlr3.streams import * -from antlr3.tokens import * diff --git a/thirdparty/antlr3/compat.py b/thirdparty/antlr3/compat.py deleted file mode 100644 index b29afcaae..000000000 --- a/thirdparty/antlr3/compat.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Compatibility stuff""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - -try: - set = set - frozenset = frozenset -except NameError: - from sets import Set as set, ImmutableSet as frozenset - - -try: - reversed = reversed -except NameError: - def reversed(l): - l = l[:] - l.reverse() - return l - - diff --git a/thirdparty/antlr3/constants.py b/thirdparty/antlr3/constants.py deleted file mode 100644 index bf4a47a02..000000000 --- a/thirdparty/antlr3/constants.py +++ /dev/null @@ -1,57 +0,0 @@ -"""ANTLR3 runtime package""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - -EOF = -1 - -## All tokens go to the parser (unless skip() is called in that rule) -# on a particular "channel". The parser tunes to a particular channel -# so that whitespace etc... can go to the parser on a "hidden" channel. -DEFAULT_CHANNEL = 0 - -## Anything on different channel than DEFAULT_CHANNEL is not parsed -# by parser. -HIDDEN_CHANNEL = 99 - -# Predefined token types -EOR_TOKEN_TYPE = 1 - -## -# imaginary tree navigation type; traverse "get child" link -DOWN = 2 -## -#imaginary tree navigation type; finish with a child list -UP = 3 - -MIN_TOKEN_TYPE = UP+1 - -INVALID_TOKEN_TYPE = 0 - diff --git a/thirdparty/antlr3/debug.py b/thirdparty/antlr3/debug.py deleted file mode 100644 index 5a578411c..000000000 --- a/thirdparty/antlr3/debug.py +++ /dev/null @@ -1,1137 +0,0 @@ -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2009 Terence Parr -# All rights reserved. - -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. - -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - -import socket -from antlr3 import Parser, TokenStream, RecognitionException, Token -from antlr3.tree import CommonTreeAdaptor, TreeAdaptor, Tree - -class DebugParser(Parser): - def __init__(self, stream, state=None, dbg=None, *args, **kwargs): - # wrap token stream in DebugTokenStream (unless user already did so). - if not isinstance(stream, DebugTokenStream): - stream = DebugTokenStream(stream, dbg) - - super(DebugParser, self).__init__(stream, state, *args, **kwargs) - - # Who to notify when events in the parser occur. - self._dbg = None - - self.setDebugListener(dbg) - - - def setDebugListener(self, dbg): - """Provide a new debug event listener for this parser. Notify the - input stream too that it should send events to this listener. - """ - - if hasattr(self.input, 'dbg'): - self.input.dbg = dbg - - self._dbg = dbg - - def getDebugListener(self): - return self._dbg - - dbg = property(getDebugListener, setDebugListener) - - - def beginResync(self): - self._dbg.beginResync() - - - def endResync(self): - self._dbg.endResync() - - - def beginBacktrack(self, level): - self._dbg.beginBacktrack(level) - - - def endBacktrack(self, level, successful): - self._dbg.endBacktrack(level,successful) - - - def reportError(self, exc): - Parser.reportError(self, exc) - - if isinstance(exc, RecognitionException): - self._dbg.recognitionException(exc) - - -class DebugTokenStream(TokenStream): - def __init__(self, input, dbg=None): - self.input = input - self.initialStreamState = True - # Track the last mark() call result value for use in rewind(). - self.lastMarker = None - - self._dbg = None - self.setDebugListener(dbg) - - # force TokenStream to get at least first valid token - # so we know if there are any hidden tokens first in the stream - self.input.LT(1) - - - def getDebugListener(self): - return self._dbg - - def setDebugListener(self, dbg): - self._dbg = dbg - - dbg = property(getDebugListener, setDebugListener) - - - def consume(self): - if self.initialStreamState: - self.consumeInitialHiddenTokens() - - a = self.input.index() - t = self.input.LT(1) - self.input.consume() - b = self.input.index() - self._dbg.consumeToken(t) - - if b > a+1: - # then we consumed more than one token; must be off channel tokens - for idx in range(a+1, b): - self._dbg.consumeHiddenToken(self.input.get(idx)); - - - def consumeInitialHiddenTokens(self): - """consume all initial off-channel tokens""" - - firstOnChannelTokenIndex = self.input.index() - for idx in range(firstOnChannelTokenIndex): - self._dbg.consumeHiddenToken(self.input.get(idx)) - - self.initialStreamState = False - - - def LT(self, i): - if self.initialStreamState: - self.consumeInitialHiddenTokens() - - t = self.input.LT(i) - self._dbg.LT(i, t) - return t - - - def LA(self, i): - if self.initialStreamState: - self.consumeInitialHiddenTokens() - - t = self.input.LT(i) - self._dbg.LT(i, t) - return t.type - - - def get(self, i): - return self.input.get(i) - - - def index(self): - return self.input.index() - - - def mark(self): - self.lastMarker = self.input.mark() - self._dbg.mark(self.lastMarker) - return self.lastMarker - - - def rewind(self, marker=None): - self._dbg.rewind(marker) - self.input.rewind(marker) - - - def release(self, marker): - pass - - - def seek(self, index): - # TODO: implement seek in dbg interface - # self._dbg.seek(index); - self.input.seek(index) - - - def size(self): - return self.input.size() - - - def getTokenSource(self): - return self.input.getTokenSource() - - - def getSourceName(self): - return self.getTokenSource().getSourceName() - - - def toString(self, start=None, stop=None): - return self.input.toString(start, stop) - - -class DebugTreeAdaptor(TreeAdaptor): - """A TreeAdaptor proxy that fires debugging events to a DebugEventListener - delegate and uses the TreeAdaptor delegate to do the actual work. All - AST events are triggered by this adaptor; no code gen changes are needed - in generated rules. Debugging events are triggered *after* invoking - tree adaptor routines. - - Trees created with actions in rewrite actions like "-> ^(ADD {foo} {bar})" - cannot be tracked as they might not use the adaptor to create foo, bar. - The debug listener has to deal with tree node IDs for which it did - not see a createNode event. A single node is sufficient even - if it represents a whole tree. - """ - - def __init__(self, dbg, adaptor): - self.dbg = dbg - self.adaptor = adaptor - - - def createWithPayload(self, payload): - if payload.getTokenIndex() < 0: - # could be token conjured up during error recovery - return self.createFromType(payload.getType(), payload.getText()) - - node = self.adaptor.createWithPayload(payload) - self.dbg.createNode(node, payload) - return node - - def createFromToken(self, tokenType, fromToken, text=None): - node = self.adaptor.createFromToken(tokenType, fromToken, text) - self.dbg.createNode(node) - return node - - def createFromType(self, tokenType, text): - node = self.adaptor.createFromType(tokenType, text) - self.dbg.createNode(node) - return node - - - def errorNode(self, input, start, stop, exc): - node = selfadaptor.errorNode(input, start, stop, exc) - if node is not None: - dbg.errorNode(node) - - return node - - - def dupTree(self, tree): - t = self.adaptor.dupTree(tree) - # walk the tree and emit create and add child events - # to simulate what dupTree has done. dupTree does not call this debug - # adapter so I must simulate. - self.simulateTreeConstruction(t) - return t - - - def simulateTreeConstruction(self, t): - """^(A B C): emit create A, create B, add child, ...""" - self.dbg.createNode(t) - for i in range(self.adaptor.getChildCount(t)): - child = self.adaptor.getChild(t, i) - self.simulateTreeConstruction(child) - self.dbg.addChild(t, child) - - - def dupNode(self, treeNode): - d = self.adaptor.dupNode(treeNode) - self.dbg.createNode(d) - return d - - - def nil(self): - node = self.adaptor.nil() - self.dbg.nilNode(node) - return node - - - def isNil(self, tree): - return self.adaptor.isNil(tree) - - - def addChild(self, t, child): - if isinstance(child, Token): - n = self.createWithPayload(child) - self.addChild(t, n) - - else: - if t is None or child is None: - return - - self.adaptor.addChild(t, child) - self.dbg.addChild(t, child) - - def becomeRoot(self, newRoot, oldRoot): - if isinstance(newRoot, Token): - n = self.createWithPayload(newRoot) - self.adaptor.becomeRoot(n, oldRoot) - else: - n = self.adaptor.becomeRoot(newRoot, oldRoot) - - self.dbg.becomeRoot(newRoot, oldRoot) - return n - - - def rulePostProcessing(self, root): - return self.adaptor.rulePostProcessing(root) - - - def getType(self, t): - return self.adaptor.getType(t) - - - def setType(self, t, type): - self.adaptor.setType(t, type) - - - def getText(self, t): - return self.adaptor.getText(t) - - - def setText(self, t, text): - self.adaptor.setText(t, text) - - - def getToken(self, t): - return self.adaptor.getToken(t) - - - def setTokenBoundaries(self, t, startToken, stopToken): - self.adaptor.setTokenBoundaries(t, startToken, stopToken) - if t is not None and startToken is not None and stopToken is not None: - self.dbg.setTokenBoundaries( - t, startToken.getTokenIndex(), - stopToken.getTokenIndex()) - - - def getTokenStartIndex(self, t): - return self.adaptor.getTokenStartIndex(t) - - - def getTokenStopIndex(self, t): - return self.adaptor.getTokenStopIndex(t) - - - def getChild(self, t, i): - return self.adaptor.getChild(t, i) - - - def setChild(self, t, i, child): - self.adaptor.setChild(t, i, child) - - - def deleteChild(self, t, i): - return self.adaptor.deleteChild(t, i) - - - def getChildCount(self, t): - return self.adaptor.getChildCount(t) - - - def getUniqueID(self, node): - return self.adaptor.getUniqueID(node) - - - def getParent(self, t): - return self.adaptor.getParent(t) - - - def getChildIndex(self, t): - return self.adaptor.getChildIndex(t) - - - def setParent(self, t, parent): - self.adaptor.setParent(t, parent) - - - def setChildIndex(self, t, index): - self.adaptor.setChildIndex(t, index) - - - def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): - self.adaptor.replaceChildren(parent, startChildIndex, stopChildIndex, t) - - - ## support - - def getDebugListener(self): - return dbg - - def setDebugListener(self, dbg): - self.dbg = dbg - - - def getTreeAdaptor(self): - return self.adaptor - - - -class DebugEventListener(object): - """All debugging events that a recognizer can trigger. - - I did not create a separate AST debugging interface as it would create - lots of extra classes and DebugParser has a dbg var defined, which makes - it hard to change to ASTDebugEventListener. I looked hard at this issue - and it is easier to understand as one monolithic event interface for all - possible events. Hopefully, adding ST debugging stuff won't be bad. Leave - for future. 4/26/2006. - """ - - # Moved to version 2 for v3.1: added grammar name to enter/exit Rule - PROTOCOL_VERSION = "2" - - def enterRule(self, grammarFileName, ruleName): - """The parser has just entered a rule. No decision has been made about - which alt is predicted. This is fired AFTER init actions have been - executed. Attributes are defined and available etc... - The grammarFileName allows composite grammars to jump around among - multiple grammar files. - """ - - pass - - - def enterAlt(self, alt): - """Because rules can have lots of alternatives, it is very useful to - know which alt you are entering. This is 1..n for n alts. - """ - pass - - - def exitRule(self, grammarFileName, ruleName): - """This is the last thing executed before leaving a rule. It is - executed even if an exception is thrown. This is triggered after - error reporting and recovery have occurred (unless the exception is - not caught in this rule). This implies an "exitAlt" event. - The grammarFileName allows composite grammars to jump around among - multiple grammar files. - """ - pass - - - def enterSubRule(self, decisionNumber): - """Track entry into any (...) subrule other EBNF construct""" - pass - - - def exitSubRule(self, decisionNumber): - pass - - - def enterDecision(self, decisionNumber, couldBacktrack): - """Every decision, fixed k or arbitrary, has an enter/exit event - so that a GUI can easily track what LT/consume events are - associated with prediction. You will see a single enter/exit - subrule but multiple enter/exit decision events, one for each - loop iteration. - """ - pass - - - def exitDecision(self, decisionNumber): - pass - - - def consumeToken(self, t): - """An input token was consumed; matched by any kind of element. - Trigger after the token was matched by things like match(), matchAny(). - """ - pass - - - def consumeHiddenToken(self, t): - """An off-channel input token was consumed. - Trigger after the token was matched by things like match(), matchAny(). - (unless of course the hidden token is first stuff in the input stream). - """ - pass - - - def LT(self, i, t): - """Somebody (anybody) looked ahead. Note that this actually gets - triggered by both LA and LT calls. The debugger will want to know - which Token object was examined. Like consumeToken, this indicates - what token was seen at that depth. A remote debugger cannot look - ahead into a file it doesn't have so LT events must pass the token - even if the info is redundant. - """ - pass - - - def mark(self, marker): - """The parser is going to look arbitrarily ahead; mark this location, - the token stream's marker is sent in case you need it. - """ - pass - - - def rewind(self, marker=None): - """After an arbitrarily long lookahead as with a cyclic DFA (or with - any backtrack), this informs the debugger that stream should be - rewound to the position associated with marker. - - """ - pass - - - def beginBacktrack(self, level): - pass - - - def endBacktrack(self, level, successful): - pass - - - def location(self, line, pos): - """To watch a parser move through the grammar, the parser needs to - inform the debugger what line/charPos it is passing in the grammar. - For now, this does not know how to switch from one grammar to the - other and back for island grammars etc... - - This should also allow breakpoints because the debugger can stop - the parser whenever it hits this line/pos. - """ - pass - - - def recognitionException(self, e): - """A recognition exception occurred such as NoViableAltException. I made - this a generic event so that I can alter the exception hierarchy later - without having to alter all the debug objects. - - Upon error, the stack of enter rule/subrule must be properly unwound. - If no viable alt occurs it is within an enter/exit decision, which - also must be rewound. Even the rewind for each mark must be unwound. - In the Java target this is pretty easy using try/finally, if a bit - ugly in the generated code. The rewind is generated in DFA.predict() - actually so no code needs to be generated for that. For languages - w/o this "finally" feature (C++?), the target implementor will have - to build an event stack or something. - - Across a socket for remote debugging, only the RecognitionException - data fields are transmitted. The token object or whatever that - caused the problem was the last object referenced by LT. The - immediately preceding LT event should hold the unexpected Token or - char. - - Here is a sample event trace for grammar: - - b : C ({;}A|B) // {;} is there to prevent A|B becoming a set - | D - ; - - The sequence for this rule (with no viable alt in the subrule) for - input 'c c' (there are 3 tokens) is: - - commence - LT(1) - enterRule b - location 7 1 - enter decision 3 - LT(1) - exit decision 3 - enterAlt1 - location 7 5 - LT(1) - consumeToken [c/<4>,1:0] - location 7 7 - enterSubRule 2 - enter decision 2 - LT(1) - LT(1) - recognitionException NoViableAltException 2 1 2 - exit decision 2 - exitSubRule 2 - beginResync - LT(1) - consumeToken [c/<4>,1:1] - LT(1) - endResync - LT(-1) - exitRule b - terminate - """ - pass - - - def beginResync(self): - """Indicates the recognizer is about to consume tokens to resynchronize - the parser. Any consume events from here until the recovered event - are not part of the parse--they are dead tokens. - """ - pass - - - def endResync(self): - """Indicates that the recognizer has finished consuming tokens in order - to resynchronize. There may be multiple beginResync/endResync pairs - before the recognizer comes out of errorRecovery mode (in which - multiple errors are suppressed). This will be useful - in a gui where you want to probably grey out tokens that are consumed - but not matched to anything in grammar. Anything between - a beginResync/endResync pair was tossed out by the parser. - """ - pass - - - def semanticPredicate(self, result, predicate): - """A semantic predicate was evaluate with this result and action text""" - pass - - - def commence(self): - """Announce that parsing has begun. Not technically useful except for - sending events over a socket. A GUI for example will launch a thread - to connect and communicate with a remote parser. The thread will want - to notify the GUI when a connection is made. ANTLR parsers - trigger this upon entry to the first rule (the ruleLevel is used to - figure this out). - """ - pass - - - def terminate(self): - """Parsing is over; successfully or not. Mostly useful for telling - remote debugging listeners that it's time to quit. When the rule - invocation level goes to zero at the end of a rule, we are done - parsing. - """ - pass - - - ## T r e e P a r s i n g - - def consumeNode(self, t): - """Input for a tree parser is an AST, but we know nothing for sure - about a node except its type and text (obtained from the adaptor). - This is the analog of the consumeToken method. Again, the ID is - the hashCode usually of the node so it only works if hashCode is - not implemented. If the type is UP or DOWN, then - the ID is not really meaningful as it's fixed--there is - just one UP node and one DOWN navigation node. - """ - pass - - - def LT(self, i, t): - """The tree parser lookedahead. If the type is UP or DOWN, - then the ID is not really meaningful as it's fixed--there is - just one UP node and one DOWN navigation node. - """ - pass - - - - ## A S T E v e n t s - - def nilNode(self, t): - """A nil was created (even nil nodes have a unique ID... - they are not "null" per se). As of 4/28/2006, this - seems to be uniquely triggered when starting a new subtree - such as when entering a subrule in automatic mode and when - building a tree in rewrite mode. - - If you are receiving this event over a socket via - RemoteDebugEventSocketListener then only t.ID is set. - """ - pass - - - def errorNode(self, t): - """Upon syntax error, recognizers bracket the error with an error node - if they are building ASTs. - """ - pass - - - def createNode(self, node, token=None): - """Announce a new node built from token elements such as type etc... - - If you are receiving this event over a socket via - RemoteDebugEventSocketListener then only t.ID, type, text are - set. - """ - pass - - - def becomeRoot(self, newRoot, oldRoot): - """Make a node the new root of an existing root. - - Note: the newRootID parameter is possibly different - than the TreeAdaptor.becomeRoot() newRoot parameter. - In our case, it will always be the result of calling - TreeAdaptor.becomeRoot() and not root_n or whatever. - - The listener should assume that this event occurs - only when the current subrule (or rule) subtree is - being reset to newRootID. - - If you are receiving this event over a socket via - RemoteDebugEventSocketListener then only IDs are set. - - @see antlr3.tree.TreeAdaptor.becomeRoot() - """ - pass - - - def addChild(self, root, child): - """Make childID a child of rootID. - - If you are receiving this event over a socket via - RemoteDebugEventSocketListener then only IDs are set. - - @see antlr3.tree.TreeAdaptor.addChild() - """ - pass - - - def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): - """Set the token start/stop token index for a subtree root or node. - - If you are receiving this event over a socket via - RemoteDebugEventSocketListener then only t.ID is set. - """ - pass - - -class BlankDebugEventListener(DebugEventListener): - """A blank listener that does nothing; useful for real classes so - they don't have to have lots of blank methods and are less - sensitive to updates to debug interface. - - Note: this class is identical to DebugEventListener and exists purely - for compatibility with Java. - """ - pass - - -class TraceDebugEventListener(DebugEventListener): - """A listener that simply records text representations of the events. - - Useful for debugging the debugging facility ;) - - Subclasses can override the record() method (which defaults to printing to - stdout) to record the events in a different way. - """ - - def __init__(self, adaptor=None): - super(TraceDebugEventListener, self).__init__() - - if adaptor is None: - adaptor = CommonTreeAdaptor() - self.adaptor = adaptor - - def record(self, event): - sys.stdout.write(event + '\n') - - def enterRule(self, grammarFileName, ruleName): - self.record("enterRule "+ruleName) - - def exitRule(self, grammarFileName, ruleName): - self.record("exitRule "+ruleName) - - def enterSubRule(self, decisionNumber): - self.record("enterSubRule") - - def exitSubRule(self, decisionNumber): - self.record("exitSubRule") - - def location(self, line, pos): - self.record("location %s:%s" % (line, pos)) - - ## Tree parsing stuff - - def consumeNode(self, t): - self.record("consumeNode %s %s %s" % ( - self.adaptor.getUniqueID(t), - self.adaptor.getText(t), - self.adaptor.getType(t))) - - def LT(self, i, t): - self.record("LT %s %s %s %s" % ( - i, - self.adaptor.getUniqueID(t), - self.adaptor.getText(t), - self.adaptor.getType(t))) - - - ## AST stuff - def nilNode(self, t): - self.record("nilNode %s" % self.adaptor.getUniqueID(t)) - - def createNode(self, t, token=None): - if token is None: - self.record("create %s: %s, %s" % ( - self.adaptor.getUniqueID(t), - self.adaptor.getText(t), - self.adaptor.getType(t))) - - else: - self.record("create %s: %s" % ( - self.adaptor.getUniqueID(t), - token.getTokenIndex())) - - def becomeRoot(self, newRoot, oldRoot): - self.record("becomeRoot %s, %s" % ( - self.adaptor.getUniqueID(newRoot), - self.adaptor.getUniqueID(oldRoot))) - - def addChild(self, root, child): - self.record("addChild %s, %s" % ( - self.adaptor.getUniqueID(root), - self.adaptor.getUniqueID(child))) - - def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): - self.record("setTokenBoundaries %s, %s, %s" % ( - self.adaptor.getUniqueID(t), - tokenStartIndex, tokenStopIndex)) - - -class RecordDebugEventListener(TraceDebugEventListener): - """A listener that records events as strings in an array.""" - - def __init__(self, adaptor=None): - super(RecordDebugEventListener, self).__init__(adaptor) - - self.events = [] - - def record(self, event): - self.events.append(event) - - -class DebugEventSocketProxy(DebugEventListener): - """A proxy debug event listener that forwards events over a socket to - a debugger (or any other listener) using a simple text-based protocol; - one event per line. ANTLRWorks listens on server socket with a - RemoteDebugEventSocketListener instance. These two objects must therefore - be kept in sync. New events must be handled on both sides of socket. - """ - - DEFAULT_DEBUGGER_PORT = 49100 - - def __init__(self, recognizer, adaptor=None, port=None, - debug=None): - super(DebugEventSocketProxy, self).__init__() - - self.grammarFileName = recognizer.getGrammarFileName() - - # Almost certainly the recognizer will have adaptor set, but - # we don't know how to cast it (Parser or TreeParser) to get - # the adaptor field. Must be set with a constructor. :( - self.adaptor = adaptor - - self.port = port or self.DEFAULT_DEBUGGER_PORT - - self.debug = debug - - self.socket = None - self.connection = None - self.input = None - self.output = None - - - def log(self, msg): - if self.debug is not None: - self.debug.write(msg + '\n') - - - def handshake(self): - if self.socket is None: - # create listening socket - self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - self.socket.bind(('', self.port)) - self.socket.listen(1) - self.log("Waiting for incoming connection on port %d" % self.port) - - # wait for an incoming connection - self.connection, addr = self.socket.accept() - self.log("Accepted connection from %s:%d" % addr) - - self.connection.setblocking(1) - self.connection.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1) - - # FIXME(pink): wrap into utf8 encoding stream - self.output = self.connection.makefile('w', 0) - self.input = self.connection.makefile('r', 0) - - self.write("ANTLR %s" % self.PROTOCOL_VERSION) - self.write("grammar \"%s" % self.grammarFileName) - self.ack() - - - def write(self, msg): - self.log("> %s" % msg) - self.output.write("%s\n" % msg) - self.output.flush() - - - def ack(self): - t = self.input.readline() - self.log("< %s" % t.rstrip()) - - - def transmit(self, event): - self.write(event); - self.ack(); - - - def commence(self): - # don't bother sending event; listener will trigger upon connection - pass - - - def terminate(self): - self.transmit("terminate") - self.output.close() - self.input.close() - self.connection.close() - self.socket.close() - - - def enterRule(self, grammarFileName, ruleName): - self.transmit("enterRule\t%s\t%s" % (grammarFileName, ruleName)) - - - def enterAlt(self, alt): - self.transmit("enterAlt\t%d" % alt) - - - def exitRule(self, grammarFileName, ruleName): - self.transmit("exitRule\t%s\t%s" % (grammarFileName, ruleName)) - - - def enterSubRule(self, decisionNumber): - self.transmit("enterSubRule\t%d" % decisionNumber) - - - def exitSubRule(self, decisionNumber): - self.transmit("exitSubRule\t%d" % decisionNumber) - - - def enterDecision(self, decisionNumber, couldBacktrack): - self.transmit( - "enterDecision\t%d\t%d" % (decisionNumber, couldBacktrack)) - - - def exitDecision(self, decisionNumber): - self.transmit("exitDecision\t%d" % decisionNumber) - - - def consumeToken(self, t): - self.transmit("consumeToken\t%s" % self.serializeToken(t)) - - - def consumeHiddenToken(self, t): - self.transmit("consumeHiddenToken\t%s" % self.serializeToken(t)) - - - def LT(self, i, o): - if isinstance(o, Tree): - return self.LT_tree(i, o) - return self.LT_token(i, o) - - - def LT_token(self, i, t): - if t is not None: - self.transmit("LT\t%d\t%s" % (i, self.serializeToken(t))) - - - def mark(self, i): - self.transmit("mark\t%d" % i) - - - def rewind(self, i=None): - if i is not None: - self.transmit("rewind\t%d" % i) - else: - self.transmit("rewind") - - - def beginBacktrack(self, level): - self.transmit("beginBacktrack\t%d" % level) - - - def endBacktrack(self, level, successful): - self.transmit("endBacktrack\t%d\t%s" % ( - level, ['0', '1'][bool(successful)])) - - - def location(self, line, pos): - self.transmit("location\t%d\t%d" % (line, pos)) - - - def recognitionException(self, exc): - self.transmit('\t'.join([ - "exception", - exc.__class__.__name__, - str(int(exc.index)), - str(int(exc.line)), - str(int(exc.charPositionInLine))])) - - - def beginResync(self): - self.transmit("beginResync") - - - def endResync(self): - self.transmit("endResync") - - - def semanticPredicate(self, result, predicate): - self.transmit('\t'.join([ - "semanticPredicate", - str(int(result)), - self.escapeNewlines(predicate)])) - - ## A S T P a r s i n g E v e n t s - - def consumeNode(self, t): - FIXME(31) -# StringBuffer buf = new StringBuffer(50); -# buf.append("consumeNode"); -# serializeNode(buf, t); -# transmit(buf.toString()); - - - def LT_tree(self, i, t): - FIXME(34) -# int ID = adaptor.getUniqueID(t); -# String text = adaptor.getText(t); -# int type = adaptor.getType(t); -# StringBuffer buf = new StringBuffer(50); -# buf.append("LN\t"); // lookahead node; distinguish from LT in protocol -# buf.append(i); -# serializeNode(buf, t); -# transmit(buf.toString()); - - - def serializeNode(self, buf, t): - FIXME(33) -# int ID = adaptor.getUniqueID(t); -# String text = adaptor.getText(t); -# int type = adaptor.getType(t); -# buf.append("\t"); -# buf.append(ID); -# buf.append("\t"); -# buf.append(type); -# Token token = adaptor.getToken(t); -# int line = -1; -# int pos = -1; -# if ( token!=null ) { -# line = token.getLine(); -# pos = token.getCharPositionInLine(); -# } -# buf.append("\t"); -# buf.append(line); -# buf.append("\t"); -# buf.append(pos); -# int tokenIndex = adaptor.getTokenStartIndex(t); -# buf.append("\t"); -# buf.append(tokenIndex); -# serializeText(buf, text); - - - ## A S T E v e n t s - - def nilNode(self, t): - self.transmit("nilNode\t%d" % self.adaptor.getUniqueID(t)) - - - def errorNode(self, t): - self.transmit("errorNode\t%d\t%d\t\"%s" % ( - self.adaptor.getUniqueID(t), - Token.INVALID_TOKEN_TYPE, - self.escapeNewlines(t.toString()))) - - - - def createNode(self, node, token=None): - if token is not None: - self.transmit("createNode\t%d\t%d" % ( - self.adaptor.getUniqueID(node), - token.getTokenIndex())) - - else: - self.transmit("createNodeFromTokenElements\t%d\t%d\t\"%s" % ( - self.adaptor.getUniqueID(node), - self.adaptor.getType(node), - self.adaptor.getText(node))) - - - def becomeRoot(self, newRoot, oldRoot): - self.transmit("becomeRoot\t%d\t%d" % ( - self.adaptor.getUniqueID(newRoot), - self.adaptor.getUniqueID(oldRoot))) - - - def addChild(self, root, child): - self.transmit("addChild\t%d\t%d" % ( - self.adaptor.getUniqueID(root), - self.adaptor.getUniqueID(child))) - - - def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): - self.transmit("setTokenBoundaries\t%d\t%d\t%d" % ( - self.adaptor.getUniqueID(t), - tokenStartIndex, tokenStopIndex)) - - - - ## support - - def setTreeAdaptor(self, adaptor): - self.adaptor = adaptor - - def getTreeAdaptor(self): - return self.adaptor - - - def serializeToken(self, t): - buf = [str(int(t.getTokenIndex())), - str(int(t.getType())), - str(int(t.getChannel())), - str(int(t.getLine() or 0)), - str(int(t.getCharPositionInLine() or 0)), - '\"' + self.escapeNewlines(t.getText())] - return '\t'.join(buf) - - - def escapeNewlines(self, txt): - if txt is None: - return '' - - txt = txt.replace("%","%25") # escape all escape char ;) - txt = txt.replace("\n","%0A") # escape \n - txt = txt.replace("\r","%0D") # escape \r - return txt diff --git a/thirdparty/antlr3/dfa.py b/thirdparty/antlr3/dfa.py deleted file mode 100644 index ff93761ad..000000000 --- a/thirdparty/antlr3/dfa.py +++ /dev/null @@ -1,213 +0,0 @@ -"""ANTLR3 runtime package""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licensc] - -from antlr3.constants import EOF -from antlr3.exceptions import NoViableAltException, BacktrackingFailed - - -class DFA(object): - """@brief A DFA implemented as a set of transition tables. - - Any state that has a semantic predicate edge is special; those states - are generated with if-then-else structures in a specialStateTransition() - which is generated by cyclicDFA template. - - """ - - def __init__( - self, - recognizer, decisionNumber, - eot, eof, min, max, accept, special, transition - ): - ## Which recognizer encloses this DFA? Needed to check backtracking - self.recognizer = recognizer - - self.decisionNumber = decisionNumber - self.eot = eot - self.eof = eof - self.min = min - self.max = max - self.accept = accept - self.special = special - self.transition = transition - - - def predict(self, input): - """ - From the input stream, predict what alternative will succeed - using this DFA (representing the covering regular approximation - to the underlying CFL). Return an alternative number 1..n. Throw - an exception upon error. - """ - mark = input.mark() - s = 0 # we always start at s0 - try: - for _ in xrange(50000): - #print "***Current state = %d" % s - - specialState = self.special[s] - if specialState >= 0: - #print "is special" - s = self.specialStateTransition(specialState, input) - if s == -1: - self.noViableAlt(s, input) - return 0 - input.consume() - continue - - if self.accept[s] >= 1: - #print "accept state for alt %d" % self.accept[s] - return self.accept[s] - - # look for a normal char transition - c = input.LA(1) - - #print "LA = %d (%r)" % (c, unichr(c) if c >= 0 else 'EOF') - #print "range = %d..%d" % (self.min[s], self.max[s]) - - if c >= self.min[s] and c <= self.max[s]: - # move to next state - snext = self.transition[s][c-self.min[s]] - #print "in range, next state = %d" % snext - - if snext < 0: - #print "not a normal transition" - # was in range but not a normal transition - # must check EOT, which is like the else clause. - # eot[s]>=0 indicates that an EOT edge goes to another - # state. - if self.eot[s] >= 0: # EOT Transition to accept state? - #print "EOT trans to accept state %d" % self.eot[s] - - s = self.eot[s] - input.consume() - # TODO: I had this as return accept[eot[s]] - # which assumed here that the EOT edge always - # went to an accept...faster to do this, but - # what about predicated edges coming from EOT - # target? - continue - - #print "no viable alt" - self.noViableAlt(s, input) - return 0 - - s = snext - input.consume() - continue - - if self.eot[s] >= 0: - #print "EOT to %d" % self.eot[s] - - s = self.eot[s] - input.consume() - continue - - # EOF Transition to accept state? - if c == EOF and self.eof[s] >= 0: - #print "EOF Transition to accept state %d" \ - # % self.accept[self.eof[s]] - return self.accept[self.eof[s]] - - # not in range and not EOF/EOT, must be invalid symbol - self.noViableAlt(s, input) - return 0 - - else: - raise RuntimeError("DFA bang!") - - finally: - input.rewind(mark) - - - def noViableAlt(self, s, input): - if self.recognizer._state.backtracking > 0: - raise BacktrackingFailed - - nvae = NoViableAltException( - self.getDescription(), - self.decisionNumber, - s, - input - ) - - self.error(nvae) - raise nvae - - - def error(self, nvae): - """A hook for debugging interface""" - pass - - - def specialStateTransition(self, s, input): - return -1 - - - def getDescription(self): - return "n/a" - - -## def specialTransition(self, state, symbol): -## return 0 - - - def unpack(cls, string): - """@brief Unpack the runlength encoded table data. - - Terence implemented packed table initializers, because Java has a - size restriction on .class files and the lookup tables can grow - pretty large. The generated JavaLexer.java of the Java.g example - would be about 15MB with uncompressed array initializers. - - Python does not have any size restrictions, but the compilation of - such large source files seems to be pretty memory hungry. The memory - consumption of the python process grew to >1.5GB when importing a - 15MB lexer, eating all my swap space and I was to impacient to see, - if it could finish at all. With packed initializers that are unpacked - at import time of the lexer module, everything works like a charm. - - """ - - ret = [] - for i in range(len(string) / 2): - (n, v) = ord(string[i*2]), ord(string[i*2+1]) - - # Is there a bitwise operation to do this? - if v == 0xFFFF: - v = -1 - - ret += [v] * n - - return ret - - unpack = classmethod(unpack) diff --git a/thirdparty/antlr3/dottreegen.py b/thirdparty/antlr3/dottreegen.py deleted file mode 100644 index 41415b134..000000000 --- a/thirdparty/antlr3/dottreegen.py +++ /dev/null @@ -1,210 +0,0 @@ -""" @package antlr3.dottreegenerator -@brief ANTLR3 runtime package, tree module - -This module contains all support classes for AST construction and tree parsers. - -""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - -# lot's of docstrings are missing, don't complain for now... -# pylint: disable-msg=C0111 - -from antlr3.tree import CommonTreeAdaptor -import stringtemplate3 - -class DOTTreeGenerator(object): - """ - A utility class to generate DOT diagrams (graphviz) from - arbitrary trees. You can pass in your own templates and - can pass in any kind of tree or use Tree interface method. - """ - - _treeST = stringtemplate3.StringTemplate( - template=( - "digraph {\n" + - " ordering=out;\n" + - " ranksep=.4;\n" + - " node [shape=plaintext, fixedsize=true, fontsize=11, fontname=\"Courier\",\n" + - " width=.25, height=.25];\n" + - " edge [arrowsize=.5]\n" + - " $nodes$\n" + - " $edges$\n" + - "}\n") - ) - - _nodeST = stringtemplate3.StringTemplate( - template="$name$ [label=\"$text$\"];\n" - ) - - _edgeST = stringtemplate3.StringTemplate( - template="$parent$ -> $child$ // \"$parentText$\" -> \"$childText$\"\n" - ) - - def __init__(self): - ## Track node to number mapping so we can get proper node name back - self.nodeToNumberMap = {} - - ## Track node number so we can get unique node names - self.nodeNumber = 0 - - - def toDOT(self, tree, adaptor=None, treeST=_treeST, edgeST=_edgeST): - if adaptor is None: - adaptor = CommonTreeAdaptor() - - treeST = treeST.getInstanceOf() - - self.nodeNumber = 0 - self.toDOTDefineNodes(tree, adaptor, treeST) - - self.nodeNumber = 0 - self.toDOTDefineEdges(tree, adaptor, treeST, edgeST) - return treeST - - - def toDOTDefineNodes(self, tree, adaptor, treeST, knownNodes=None): - if knownNodes is None: - knownNodes = set() - - if tree is None: - return - - n = adaptor.getChildCount(tree) - if n == 0: - # must have already dumped as child from previous - # invocation; do nothing - return - - # define parent node - number = self.getNodeNumber(tree) - if number not in knownNodes: - parentNodeST = self.getNodeST(adaptor, tree) - treeST.setAttribute("nodes", parentNodeST) - knownNodes.add(number) - - # for each child, do a " [label=text]" node def - for i in range(n): - child = adaptor.getChild(tree, i) - - number = self.getNodeNumber(child) - if number not in knownNodes: - nodeST = self.getNodeST(adaptor, child) - treeST.setAttribute("nodes", nodeST) - knownNodes.add(number) - - self.toDOTDefineNodes(child, adaptor, treeST, knownNodes) - - - def toDOTDefineEdges(self, tree, adaptor, treeST, edgeST): - if tree is None: - return - - n = adaptor.getChildCount(tree) - if n == 0: - # must have already dumped as child from previous - # invocation; do nothing - return - - parentName = "n%d" % self.getNodeNumber(tree) - - # for each child, do a parent -> child edge using unique node names - parentText = adaptor.getText(tree) - for i in range(n): - child = adaptor.getChild(tree, i) - childText = adaptor.getText(child) - childName = "n%d" % self.getNodeNumber(child) - edgeST = edgeST.getInstanceOf() - edgeST.setAttribute("parent", parentName) - edgeST.setAttribute("child", childName) - edgeST.setAttribute("parentText", parentText) - edgeST.setAttribute("childText", childText) - treeST.setAttribute("edges", edgeST) - self.toDOTDefineEdges(child, adaptor, treeST, edgeST) - - - def getNodeST(self, adaptor, t): - text = adaptor.getText(t) - nodeST = self._nodeST.getInstanceOf() - uniqueName = "n%d" % self.getNodeNumber(t) - nodeST.setAttribute("name", uniqueName) - if text is not None: - text = text.replace('"', r'\"') - nodeST.setAttribute("text", text) - return nodeST - - - def getNodeNumber(self, t): - try: - return self.nodeToNumberMap[t] - except KeyError: - self.nodeToNumberMap[t] = self.nodeNumber - self.nodeNumber += 1 - return self.nodeNumber - 1 - - -def toDOT(tree, adaptor=None, treeST=DOTTreeGenerator._treeST, edgeST=DOTTreeGenerator._edgeST): - """ - Generate DOT (graphviz) for a whole tree not just a node. - For example, 3+4*5 should generate: - - digraph { - node [shape=plaintext, fixedsize=true, fontsize=11, fontname="Courier", - width=.4, height=.2]; - edge [arrowsize=.7] - "+"->3 - "+"->"*" - "*"->4 - "*"->5 - } - - Return the ST not a string in case people want to alter. - - Takes a Tree interface object. - - Example of invokation: - - import antlr3 - import antlr3.extras - - input = antlr3.ANTLRInputStream(sys.stdin) - lex = TLexer(input) - tokens = antlr3.CommonTokenStream(lex) - parser = TParser(tokens) - tree = parser.e().tree - print tree.toStringTree() - st = antlr3.extras.toDOT(t) - print st - - """ - - gen = DOTTreeGenerator() - return gen.toDOT(tree, adaptor, treeST, edgeST) diff --git a/thirdparty/antlr3/exceptions.py b/thirdparty/antlr3/exceptions.py deleted file mode 100644 index 97b10743b..000000000 --- a/thirdparty/antlr3/exceptions.py +++ /dev/null @@ -1,364 +0,0 @@ -"""ANTLR3 exception hierarchy""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - -from antlr3.constants import INVALID_TOKEN_TYPE - - -class BacktrackingFailed(Exception): - """@brief Raised to signal failed backtrack attempt""" - - pass - - -class RecognitionException(Exception): - """@brief The root of the ANTLR exception hierarchy. - - To avoid English-only error messages and to generally make things - as flexible as possible, these exceptions are not created with strings, - but rather the information necessary to generate an error. Then - the various reporting methods in Parser and Lexer can be overridden - to generate a localized error message. For example, MismatchedToken - exceptions are built with the expected token type. - So, don't expect getMessage() to return anything. - - Note that as of Java 1.4, you can access the stack trace, which means - that you can compute the complete trace of rules from the start symbol. - This gives you considerable context information with which to generate - useful error messages. - - ANTLR generates code that throws exceptions upon recognition error and - also generates code to catch these exceptions in each rule. If you - want to quit upon first error, you can turn off the automatic error - handling mechanism using rulecatch action, but you still need to - override methods mismatch and recoverFromMismatchSet. - - In general, the recognition exceptions can track where in a grammar a - problem occurred and/or what was the expected input. While the parser - knows its state (such as current input symbol and line info) that - state can change before the exception is reported so current token index - is computed and stored at exception time. From this info, you can - perhaps print an entire line of input not just a single token, for example. - Better to just say the recognizer had a problem and then let the parser - figure out a fancy report. - - """ - - def __init__(self, input=None): - Exception.__init__(self) - - # What input stream did the error occur in? - self.input = None - - # What is index of token/char were we looking at when the error - # occurred? - self.index = None - - # The current Token when an error occurred. Since not all streams - # can retrieve the ith Token, we have to track the Token object. - # For parsers. Even when it's a tree parser, token might be set. - self.token = None - - # If this is a tree parser exception, node is set to the node with - # the problem. - self.node = None - - # The current char when an error occurred. For lexers. - self.c = None - - # Track the line at which the error occurred in case this is - # generated from a lexer. We need to track this since the - # unexpected char doesn't carry the line info. - self.line = None - - self.charPositionInLine = None - - # If you are parsing a tree node stream, you will encounter som - # imaginary nodes w/o line/col info. We now search backwards looking - # for most recent token with line/col info, but notify getErrorHeader() - # that info is approximate. - self.approximateLineInfo = False - - - if input is not None: - self.input = input - self.index = input.index() - - # late import to avoid cyclic dependencies - from antlr3.streams import TokenStream, CharStream - from antlr3.tree import TreeNodeStream - - if isinstance(self.input, TokenStream): - self.token = self.input.LT(1) - self.line = self.token.line - self.charPositionInLine = self.token.charPositionInLine - - if isinstance(self.input, TreeNodeStream): - self.extractInformationFromTreeNodeStream(self.input) - - else: - if isinstance(self.input, CharStream): - self.c = self.input.LT(1) - self.line = self.input.line - self.charPositionInLine = self.input.charPositionInLine - - else: - self.c = self.input.LA(1) - - def extractInformationFromTreeNodeStream(self, nodes): - from antlr3.tree import Tree, CommonTree - from antlr3.tokens import CommonToken - - self.node = nodes.LT(1) - adaptor = nodes.adaptor - payload = adaptor.getToken(self.node) - if payload is not None: - self.token = payload - if payload.line <= 0: - # imaginary node; no line/pos info; scan backwards - i = -1 - priorNode = nodes.LT(i) - while priorNode is not None: - priorPayload = adaptor.getToken(priorNode) - if priorPayload is not None and priorPayload.line > 0: - # we found the most recent real line / pos info - self.line = priorPayload.line - self.charPositionInLine = priorPayload.charPositionInLine - self.approximateLineInfo = True - break - - i -= 1 - priorNode = nodes.LT(i) - - else: # node created from real token - self.line = payload.line - self.charPositionInLine = payload.charPositionInLine - - elif isinstance(self.node, Tree): - self.line = self.node.line - self.charPositionInLine = self.node.charPositionInLine - if isinstance(self.node, CommonTree): - self.token = self.node.token - - else: - type = adaptor.getType(self.node) - text = adaptor.getText(self.node) - self.token = CommonToken(type=type, text=text) - - - def getUnexpectedType(self): - """Return the token type or char of the unexpected input element""" - - from antlr3.streams import TokenStream - from antlr3.tree import TreeNodeStream - - if isinstance(self.input, TokenStream): - return self.token.type - - elif isinstance(self.input, TreeNodeStream): - adaptor = self.input.treeAdaptor - return adaptor.getType(self.node) - - else: - return self.c - - unexpectedType = property(getUnexpectedType) - - -class MismatchedTokenException(RecognitionException): - """@brief A mismatched char or Token or tree node.""" - - def __init__(self, expecting, input): - RecognitionException.__init__(self, input) - self.expecting = expecting - - - def __str__(self): - #return "MismatchedTokenException("+self.expecting+")" - return "MismatchedTokenException(%r!=%r)" % ( - self.getUnexpectedType(), self.expecting - ) - __repr__ = __str__ - - -class UnwantedTokenException(MismatchedTokenException): - """An extra token while parsing a TokenStream""" - - def getUnexpectedToken(self): - return self.token - - - def __str__(self): - exp = ", expected %s" % self.expecting - if self.expecting == INVALID_TOKEN_TYPE: - exp = "" - - if self.token is None: - return "UnwantedTokenException(found=%s%s)" % (None, exp) - - return "UnwantedTokenException(found=%s%s)" % (self.token.text, exp) - __repr__ = __str__ - - -class MissingTokenException(MismatchedTokenException): - """ - We were expecting a token but it's not found. The current token - is actually what we wanted next. - """ - - def __init__(self, expecting, input, inserted): - MismatchedTokenException.__init__(self, expecting, input) - - self.inserted = inserted - - - def getMissingType(self): - return self.expecting - - - def __str__(self): - if self.inserted is not None and self.token is not None: - return "MissingTokenException(inserted %r at %r)" % ( - self.inserted, self.token.text) - - if self.token is not None: - return "MissingTokenException(at %r)" % self.token.text - - return "MissingTokenException" - __repr__ = __str__ - - -class MismatchedRangeException(RecognitionException): - """@brief The next token does not match a range of expected types.""" - - def __init__(self, a, b, input): - RecognitionException.__init__(self, input) - - self.a = a - self.b = b - - - def __str__(self): - return "MismatchedRangeException(%r not in [%r..%r])" % ( - self.getUnexpectedType(), self.a, self.b - ) - __repr__ = __str__ - - -class MismatchedSetException(RecognitionException): - """@brief The next token does not match a set of expected types.""" - - def __init__(self, expecting, input): - RecognitionException.__init__(self, input) - - self.expecting = expecting - - - def __str__(self): - return "MismatchedSetException(%r not in %r)" % ( - self.getUnexpectedType(), self.expecting - ) - __repr__ = __str__ - - -class MismatchedNotSetException(MismatchedSetException): - """@brief Used for remote debugger deserialization""" - - def __str__(self): - return "MismatchedNotSetException(%r!=%r)" % ( - self.getUnexpectedType(), self.expecting - ) - __repr__ = __str__ - - -class NoViableAltException(RecognitionException): - """@brief Unable to decide which alternative to choose.""" - - def __init__( - self, grammarDecisionDescription, decisionNumber, stateNumber, input - ): - RecognitionException.__init__(self, input) - - self.grammarDecisionDescription = grammarDecisionDescription - self.decisionNumber = decisionNumber - self.stateNumber = stateNumber - - - def __str__(self): - return "NoViableAltException(%r!=[%r])" % ( - self.unexpectedType, self.grammarDecisionDescription - ) - __repr__ = __str__ - - -class EarlyExitException(RecognitionException): - """@brief The recognizer did not match anything for a (..)+ loop.""" - - def __init__(self, decisionNumber, input): - RecognitionException.__init__(self, input) - - self.decisionNumber = decisionNumber - - -class FailedPredicateException(RecognitionException): - """@brief A semantic predicate failed during validation. - - Validation of predicates - occurs when normally parsing the alternative just like matching a token. - Disambiguating predicate evaluation occurs when we hoist a predicate into - a prediction decision. - """ - - def __init__(self, input, ruleName, predicateText): - RecognitionException.__init__(self, input) - - self.ruleName = ruleName - self.predicateText = predicateText - - - def __str__(self): - return "FailedPredicateException("+self.ruleName+",{"+self.predicateText+"}?)" - __repr__ = __str__ - - -class MismatchedTreeNodeException(RecognitionException): - """@brief The next tree mode does not match the expected type.""" - - def __init__(self, expecting, input): - RecognitionException.__init__(self, input) - - self.expecting = expecting - - def __str__(self): - return "MismatchedTreeNodeException(%r!=%r)" % ( - self.getUnexpectedType(), self.expecting - ) - __repr__ = __str__ diff --git a/thirdparty/antlr3/extras.py b/thirdparty/antlr3/extras.py deleted file mode 100644 index 9155cda9e..000000000 --- a/thirdparty/antlr3/extras.py +++ /dev/null @@ -1,47 +0,0 @@ -""" @package antlr3.dottreegenerator -@brief ANTLR3 runtime package, tree module - -This module contains all support classes for AST construction and tree parsers. - -""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - -# lot's of docstrings are missing, don't complain for now... -# pylint: disable-msg=C0111 - -from treewizard import TreeWizard - -try: - from antlr3.dottreegen import toDOT -except ImportError, exc: - def toDOT(*args, **kwargs): - raise exc diff --git a/thirdparty/antlr3/main.py b/thirdparty/antlr3/main.py deleted file mode 100644 index ae3906fcd..000000000 --- a/thirdparty/antlr3/main.py +++ /dev/null @@ -1,305 +0,0 @@ -"""ANTLR3 runtime package""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - - -import sys -import optparse - -import antlr3 - - -class _Main(object): - def __init__(self): - self.stdin = sys.stdin - self.stdout = sys.stdout - self.stderr = sys.stderr - - - def parseOptions(self, argv): - optParser = optparse.OptionParser() - optParser.add_option( - "--encoding", - action="store", - type="string", - dest="encoding" - ) - optParser.add_option( - "--input", - action="store", - type="string", - dest="input" - ) - optParser.add_option( - "--interactive", "-i", - action="store_true", - dest="interactive" - ) - optParser.add_option( - "--no-output", - action="store_true", - dest="no_output" - ) - optParser.add_option( - "--profile", - action="store_true", - dest="profile" - ) - optParser.add_option( - "--hotshot", - action="store_true", - dest="hotshot" - ) - optParser.add_option( - "--port", - type="int", - dest="port", - default=None - ) - optParser.add_option( - "--debug-socket", - action='store_true', - dest="debug_socket", - default=None - ) - - self.setupOptions(optParser) - - return optParser.parse_args(argv[1:]) - - - def setupOptions(self, optParser): - pass - - - def execute(self, argv): - options, args = self.parseOptions(argv) - - self.setUp(options) - - if options.interactive: - while True: - try: - input = raw_input(">>> ") - except (EOFError, KeyboardInterrupt): - self.stdout.write("\nBye.\n") - break - - inStream = antlr3.ANTLRStringStream(input) - self.parseStream(options, inStream) - - else: - if options.input is not None: - inStream = antlr3.ANTLRStringStream(options.input) - - elif len(args) == 1 and args[0] != '-': - inStream = antlr3.ANTLRFileStream( - args[0], encoding=options.encoding - ) - - else: - inStream = antlr3.ANTLRInputStream( - self.stdin, encoding=options.encoding - ) - - if options.profile: - try: - import cProfile as profile - except ImportError: - import profile - - profile.runctx( - 'self.parseStream(options, inStream)', - globals(), - locals(), - 'profile.dat' - ) - - import pstats - stats = pstats.Stats('profile.dat') - stats.strip_dirs() - stats.sort_stats('time') - stats.print_stats(100) - - elif options.hotshot: - import hotshot - - profiler = hotshot.Profile('hotshot.dat') - profiler.runctx( - 'self.parseStream(options, inStream)', - globals(), - locals() - ) - - else: - self.parseStream(options, inStream) - - - def setUp(self, options): - pass - - - def parseStream(self, options, inStream): - raise NotImplementedError - - - def write(self, options, text): - if not options.no_output: - self.stdout.write(text) - - - def writeln(self, options, text): - self.write(options, text + '\n') - - -class LexerMain(_Main): - def __init__(self, lexerClass): - _Main.__init__(self) - - self.lexerClass = lexerClass - - - def parseStream(self, options, inStream): - lexer = self.lexerClass(inStream) - for token in lexer: - self.writeln(options, str(token)) - - -class ParserMain(_Main): - def __init__(self, lexerClassName, parserClass): - _Main.__init__(self) - - self.lexerClassName = lexerClassName - self.lexerClass = None - self.parserClass = parserClass - - - def setupOptions(self, optParser): - optParser.add_option( - "--lexer", - action="store", - type="string", - dest="lexerClass", - default=self.lexerClassName - ) - optParser.add_option( - "--rule", - action="store", - type="string", - dest="parserRule" - ) - - - def setUp(self, options): - lexerMod = __import__(options.lexerClass) - self.lexerClass = getattr(lexerMod, options.lexerClass) - - - def parseStream(self, options, inStream): - kwargs = {} - if options.port is not None: - kwargs['port'] = options.port - if options.debug_socket is not None: - kwargs['debug_socket'] = sys.stderr - - lexer = self.lexerClass(inStream) - tokenStream = antlr3.CommonTokenStream(lexer) - parser = self.parserClass(tokenStream, **kwargs) - result = getattr(parser, options.parserRule)() - if result is not None: - if hasattr(result, 'tree') and result.tree is not None: - self.writeln(options, result.tree.toStringTree()) - else: - self.writeln(options, repr(result)) - - -class WalkerMain(_Main): - def __init__(self, walkerClass): - _Main.__init__(self) - - self.lexerClass = None - self.parserClass = None - self.walkerClass = walkerClass - - - def setupOptions(self, optParser): - optParser.add_option( - "--lexer", - action="store", - type="string", - dest="lexerClass", - default=None - ) - optParser.add_option( - "--parser", - action="store", - type="string", - dest="parserClass", - default=None - ) - optParser.add_option( - "--parser-rule", - action="store", - type="string", - dest="parserRule", - default=None - ) - optParser.add_option( - "--rule", - action="store", - type="string", - dest="walkerRule" - ) - - - def setUp(self, options): - lexerMod = __import__(options.lexerClass) - self.lexerClass = getattr(lexerMod, options.lexerClass) - parserMod = __import__(options.parserClass) - self.parserClass = getattr(parserMod, options.parserClass) - - - def parseStream(self, options, inStream): - lexer = self.lexerClass(inStream) - tokenStream = antlr3.CommonTokenStream(lexer) - parser = self.parserClass(tokenStream) - result = getattr(parser, options.parserRule)() - if result is not None: - assert hasattr(result, 'tree'), "Parser did not return an AST" - nodeStream = antlr3.tree.CommonTreeNodeStream(result.tree) - nodeStream.setTokenStream(tokenStream) - walker = self.walkerClass(nodeStream) - result = getattr(walker, options.walkerRule)() - if result is not None: - if hasattr(result, 'tree'): - self.writeln(options, result.tree.toStringTree()) - else: - self.writeln(options, repr(result)) diff --git a/thirdparty/antlr3/recognizers.py b/thirdparty/antlr3/recognizers.py deleted file mode 100644 index d48280a58..000000000 --- a/thirdparty/antlr3/recognizers.py +++ /dev/null @@ -1,1485 +0,0 @@ -"""ANTLR3 runtime package""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - -import sys -import inspect - -from antlr3 import compatible_api_versions -from antlr3.constants import DEFAULT_CHANNEL, HIDDEN_CHANNEL, EOF, \ - EOR_TOKEN_TYPE, INVALID_TOKEN_TYPE -from antlr3.exceptions import RecognitionException, MismatchedTokenException, \ - MismatchedRangeException, MismatchedTreeNodeException, \ - NoViableAltException, EarlyExitException, MismatchedSetException, \ - MismatchedNotSetException, FailedPredicateException, \ - BacktrackingFailed, UnwantedTokenException, MissingTokenException -from antlr3.tokens import CommonToken, SKIP_TOKEN -from antlr3.compat import set, frozenset, reversed - - -class RecognizerSharedState(object): - """ - The set of fields needed by an abstract recognizer to recognize input - and recover from errors etc... As a separate state object, it can be - shared among multiple grammars; e.g., when one grammar imports another. - - These fields are publically visible but the actual state pointer per - parser is protected. - """ - - def __init__(self): - # Track the set of token types that can follow any rule invocation. - # Stack grows upwards. - self.following = [] - - # This is true when we see an error and before having successfully - # matched a token. Prevents generation of more than one error message - # per error. - self.errorRecovery = False - - # The index into the input stream where the last error occurred. - # This is used to prevent infinite loops where an error is found - # but no token is consumed during recovery...another error is found, - # ad naseum. This is a failsafe mechanism to guarantee that at least - # one token/tree node is consumed for two errors. - self.lastErrorIndex = -1 - - # If 0, no backtracking is going on. Safe to exec actions etc... - # If >0 then it's the level of backtracking. - self.backtracking = 0 - - # An array[size num rules] of Map that tracks - # the stop token index for each rule. ruleMemo[ruleIndex] is - # the memoization table for ruleIndex. For key ruleStartIndex, you - # get back the stop token for associated rule or MEMO_RULE_FAILED. - # - # This is only used if rule memoization is on (which it is by default). - self.ruleMemo = None - - ## Did the recognizer encounter a syntax error? Track how many. - self.syntaxErrors = 0 - - - # LEXER FIELDS (must be in same state object to avoid casting - # constantly in generated code and Lexer object) :( - - - ## The goal of all lexer rules/methods is to create a token object. - # This is an instance variable as multiple rules may collaborate to - # create a single token. nextToken will return this object after - # matching lexer rule(s). If you subclass to allow multiple token - # emissions, then set this to the last token to be matched or - # something nonnull so that the auto token emit mechanism will not - # emit another token. - self.token = None - - ## What character index in the stream did the current token start at? - # Needed, for example, to get the text for current token. Set at - # the start of nextToken. - self.tokenStartCharIndex = -1 - - ## The line on which the first character of the token resides - self.tokenStartLine = None - - ## The character position of first character within the line - self.tokenStartCharPositionInLine = None - - ## The channel number for the current token - self.channel = None - - ## The token type for the current token - self.type = None - - ## You can set the text for the current token to override what is in - # the input char buffer. Use setText() or can set this instance var. - self.text = None - - -class BaseRecognizer(object): - """ - @brief Common recognizer functionality. - - A generic recognizer that can handle recognizers generated from - lexer, parser, and tree grammars. This is all the parsing - support code essentially; most of it is error recovery stuff and - backtracking. - """ - - MEMO_RULE_FAILED = -2 - MEMO_RULE_UNKNOWN = -1 - - # copies from Token object for convenience in actions - DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL - - # for convenience in actions - HIDDEN = HIDDEN_CHANNEL - - # overridden by generated subclasses - tokenNames = None - - # The api_version attribute has been introduced in 3.3. If it is not - # overwritten in the generated recognizer, we assume a default of v0. - api_version = 0 - - def __init__(self, state=None): - # Input stream of the recognizer. Must be initialized by a subclass. - self.input = None - - ## State of a lexer, parser, or tree parser are collected into a state - # object so the state can be shared. This sharing is needed to - # have one grammar import others and share same error variables - # and other state variables. It's a kind of explicit multiple - # inheritance via delegation of methods and shared state. - if state is None: - state = RecognizerSharedState() - self._state = state - - if self.api_version not in compatible_api_versions: - raise RuntimeError( - ("ANTLR version mismatch: " - "The recognizer has been generated with API V%s, " - "but this runtime does not support this.") - % self.api_version) - - # this one only exists to shut up pylint :( - def setInput(self, input): - self.input = input - - - def reset(self): - """ - reset the parser's state; subclasses must rewinds the input stream - """ - - # wack everything related to error recovery - if self._state is None: - # no shared state work to do - return - - self._state.following = [] - self._state.errorRecovery = False - self._state.lastErrorIndex = -1 - self._state.syntaxErrors = 0 - # wack everything related to backtracking and memoization - self._state.backtracking = 0 - if self._state.ruleMemo is not None: - self._state.ruleMemo = {} - - - def match(self, input, ttype, follow): - """ - Match current input symbol against ttype. Attempt - single token insertion or deletion error recovery. If - that fails, throw MismatchedTokenException. - - To turn off single token insertion or deletion error - recovery, override recoverFromMismatchedToken() and have it - throw an exception. See TreeParser.recoverFromMismatchedToken(). - This way any error in a rule will cause an exception and - immediate exit from rule. Rule would recover by resynchronizing - to the set of symbols that can follow rule ref. - """ - - matchedSymbol = self.getCurrentInputSymbol(input) - if self.input.LA(1) == ttype: - self.input.consume() - self._state.errorRecovery = False - return matchedSymbol - - if self._state.backtracking > 0: - # FIXME: need to return matchedSymbol here as well. damn!! - raise BacktrackingFailed - - matchedSymbol = self.recoverFromMismatchedToken(input, ttype, follow) - return matchedSymbol - - - def matchAny(self, input): - """Match the wildcard: in a symbol""" - - self._state.errorRecovery = False - self.input.consume() - - - def mismatchIsUnwantedToken(self, input, ttype): - return input.LA(2) == ttype - - - def mismatchIsMissingToken(self, input, follow): - if follow is None: - # we have no information about the follow; we can only consume - # a single token and hope for the best - return False - - # compute what can follow this grammar element reference - if EOR_TOKEN_TYPE in follow: - viableTokensFollowingThisRule = self.computeContextSensitiveRuleFOLLOW() - follow = follow | viableTokensFollowingThisRule - - if len(self._state.following) > 0: - # remove EOR if we're not the start symbol - follow = follow - set([EOR_TOKEN_TYPE]) - - # if current token is consistent with what could come after set - # then we know we're missing a token; error recovery is free to - # "insert" the missing token - if input.LA(1) in follow or EOR_TOKEN_TYPE in follow: - return True - - return False - - - def reportError(self, e): - """Report a recognition problem. - - This method sets errorRecovery to indicate the parser is recovering - not parsing. Once in recovery mode, no errors are generated. - To get out of recovery mode, the parser must successfully match - a token (after a resync). So it will go: - - 1. error occurs - 2. enter recovery mode, report error - 3. consume until token found in resynch set - 4. try to resume parsing - 5. next match() will reset errorRecovery mode - - If you override, make sure to update syntaxErrors if you care about - that. - - """ - - # if we've already reported an error and have not matched a token - # yet successfully, don't report any errors. - if self._state.errorRecovery: - return - - self._state.syntaxErrors += 1 # don't count spurious - self._state.errorRecovery = True - - self.displayRecognitionError(self.tokenNames, e) - - - def displayRecognitionError(self, tokenNames, e): - hdr = self.getErrorHeader(e) - msg = self.getErrorMessage(e, tokenNames) - self.emitErrorMessage(hdr+" "+msg) - - - def getErrorMessage(self, e, tokenNames): - """ - What error message should be generated for the various - exception types? - - Not very object-oriented code, but I like having all error message - generation within one method rather than spread among all of the - exception classes. This also makes it much easier for the exception - handling because the exception classes do not have to have pointers back - to this object to access utility routines and so on. Also, changing - the message for an exception type would be difficult because you - would have to subclassing exception, but then somehow get ANTLR - to make those kinds of exception objects instead of the default. - This looks weird, but trust me--it makes the most sense in terms - of flexibility. - - For grammar debugging, you will want to override this to add - more information such as the stack frame with - getRuleInvocationStack(e, this.getClass().getName()) and, - for no viable alts, the decision description and state etc... - - Override this to change the message generated for one or more - exception types. - """ - - if isinstance(e, UnwantedTokenException): - tokenName = "" - if e.expecting == EOF: - tokenName = "EOF" - - else: - tokenName = self.tokenNames[e.expecting] - - msg = "extraneous input %s expecting %s" % ( - self.getTokenErrorDisplay(e.getUnexpectedToken()), - tokenName - ) - - elif isinstance(e, MissingTokenException): - tokenName = "" - if e.expecting == EOF: - tokenName = "EOF" - - else: - tokenName = self.tokenNames[e.expecting] - - msg = "missing %s at %s" % ( - tokenName, self.getTokenErrorDisplay(e.token) - ) - - elif isinstance(e, MismatchedTokenException): - tokenName = "" - if e.expecting == EOF: - tokenName = "EOF" - else: - tokenName = self.tokenNames[e.expecting] - - msg = "mismatched input " \ - + self.getTokenErrorDisplay(e.token) \ - + " expecting " \ - + tokenName - - elif isinstance(e, MismatchedTreeNodeException): - tokenName = "" - if e.expecting == EOF: - tokenName = "EOF" - else: - tokenName = self.tokenNames[e.expecting] - - msg = "mismatched tree node: %s expecting %s" \ - % (e.node, tokenName) - - elif isinstance(e, NoViableAltException): - msg = "no viable alternative at input " \ - + self.getTokenErrorDisplay(e.token) - - elif isinstance(e, EarlyExitException): - msg = "required (...)+ loop did not match anything at input " \ - + self.getTokenErrorDisplay(e.token) - - elif isinstance(e, MismatchedSetException): - msg = "mismatched input " \ - + self.getTokenErrorDisplay(e.token) \ - + " expecting set " \ - + repr(e.expecting) - - elif isinstance(e, MismatchedNotSetException): - msg = "mismatched input " \ - + self.getTokenErrorDisplay(e.token) \ - + " expecting set " \ - + repr(e.expecting) - - elif isinstance(e, FailedPredicateException): - msg = "rule " \ - + e.ruleName \ - + " failed predicate: {" \ - + e.predicateText \ - + "}?" - - else: - msg = str(e) - - return msg - - - def getNumberOfSyntaxErrors(self): - """ - Get number of recognition errors (lexer, parser, tree parser). Each - recognizer tracks its own number. So parser and lexer each have - separate count. Does not count the spurious errors found between - an error and next valid token match - - See also reportError() - """ - return self._state.syntaxErrors - - - def getErrorHeader(self, e): - """ - What is the error header, normally line/character position information? - """ - - source_name = self.getSourceName() - if source_name is not None: - return "%s line %d:%d" % (source_name, e.line, e.charPositionInLine) - return "line %d:%d" % (e.line, e.charPositionInLine) - - - def getTokenErrorDisplay(self, t): - """ - How should a token be displayed in an error message? The default - is to display just the text, but during development you might - want to have a lot of information spit out. Override in that case - to use t.toString() (which, for CommonToken, dumps everything about - the token). This is better than forcing you to override a method in - your token objects because you don't have to go modify your lexer - so that it creates a new Java type. - """ - - s = t.text - if s is None: - if t.type == EOF: - s = "" - else: - s = "<"+t.type+">" - - return repr(s) - - - def emitErrorMessage(self, msg): - """Override this method to change where error messages go""" - sys.stderr.write(msg + '\n') - - - def recover(self, input, re): - """ - Recover from an error found on the input stream. This is - for NoViableAlt and mismatched symbol exceptions. If you enable - single token insertion and deletion, this will usually not - handle mismatched symbol exceptions but there could be a mismatched - token that the match() routine could not recover from. - """ - - # PROBLEM? what if input stream is not the same as last time - # perhaps make lastErrorIndex a member of input - if self._state.lastErrorIndex == input.index(): - # uh oh, another error at same token index; must be a case - # where LT(1) is in the recovery token set so nothing is - # consumed; consume a single token so at least to prevent - # an infinite loop; this is a failsafe. - input.consume() - - self._state.lastErrorIndex = input.index() - followSet = self.computeErrorRecoverySet() - - self.beginResync() - self.consumeUntil(input, followSet) - self.endResync() - - - def beginResync(self): - """ - A hook to listen in on the token consumption during error recovery. - The DebugParser subclasses this to fire events to the listenter. - """ - - pass - - - def endResync(self): - """ - A hook to listen in on the token consumption during error recovery. - The DebugParser subclasses this to fire events to the listenter. - """ - - pass - - - def computeErrorRecoverySet(self): - """ - Compute the error recovery set for the current rule. During - rule invocation, the parser pushes the set of tokens that can - follow that rule reference on the stack; this amounts to - computing FIRST of what follows the rule reference in the - enclosing rule. This local follow set only includes tokens - from within the rule; i.e., the FIRST computation done by - ANTLR stops at the end of a rule. - - EXAMPLE - - When you find a "no viable alt exception", the input is not - consistent with any of the alternatives for rule r. The best - thing to do is to consume tokens until you see something that - can legally follow a call to r *or* any rule that called r. - You don't want the exact set of viable next tokens because the - input might just be missing a token--you might consume the - rest of the input looking for one of the missing tokens. - - Consider grammar: - - a : '[' b ']' - | '(' b ')' - ; - b : c '^' INT ; - c : ID - | INT - ; - - At each rule invocation, the set of tokens that could follow - that rule is pushed on a stack. Here are the various "local" - follow sets: - - FOLLOW(b1_in_a) = FIRST(']') = ']' - FOLLOW(b2_in_a) = FIRST(')') = ')' - FOLLOW(c_in_b) = FIRST('^') = '^' - - Upon erroneous input "[]", the call chain is - - a -> b -> c - - and, hence, the follow context stack is: - - depth local follow set after call to rule - 0 \ a (from main()) - 1 ']' b - 3 '^' c - - Notice that ')' is not included, because b would have to have - been called from a different context in rule a for ')' to be - included. - - For error recovery, we cannot consider FOLLOW(c) - (context-sensitive or otherwise). We need the combined set of - all context-sensitive FOLLOW sets--the set of all tokens that - could follow any reference in the call chain. We need to - resync to one of those tokens. Note that FOLLOW(c)='^' and if - we resync'd to that token, we'd consume until EOF. We need to - sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. - In this case, for input "[]", LA(1) is in this set so we would - not consume anything and after printing an error rule c would - return normally. It would not find the required '^' though. - At this point, it gets a mismatched token error and throws an - exception (since LA(1) is not in the viable following token - set). The rule exception handler tries to recover, but finds - the same recovery set and doesn't consume anything. Rule b - exits normally returning to rule a. Now it finds the ']' (and - with the successful match exits errorRecovery mode). - - So, you cna see that the parser walks up call chain looking - for the token that was a member of the recovery set. - - Errors are not generated in errorRecovery mode. - - ANTLR's error recovery mechanism is based upon original ideas: - - "Algorithms + Data Structures = Programs" by Niklaus Wirth - - and - - "A note on error recovery in recursive descent parsers": - http://portal.acm.org/citation.cfm?id=947902.947905 - - Later, Josef Grosch had some good ideas: - - "Efficient and Comfortable Error Recovery in Recursive Descent - Parsers": - ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip - - Like Grosch I implemented local FOLLOW sets that are combined - at run-time upon error to avoid overhead during parsing. - """ - - return self.combineFollows(False) - - - def computeContextSensitiveRuleFOLLOW(self): - """ - Compute the context-sensitive FOLLOW set for current rule. - This is set of token types that can follow a specific rule - reference given a specific call chain. You get the set of - viable tokens that can possibly come next (lookahead depth 1) - given the current call chain. Contrast this with the - definition of plain FOLLOW for rule r: - - FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} - - where x in T* and alpha, beta in V*; T is set of terminals and - V is the set of terminals and nonterminals. In other words, - FOLLOW(r) is the set of all tokens that can possibly follow - references to r in *any* sentential form (context). At - runtime, however, we know precisely which context applies as - we have the call chain. We may compute the exact (rather - than covering superset) set of following tokens. - - For example, consider grammar: - - stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} - | "return" expr '.' - ; - expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} - atom : INT // FOLLOW(atom)=={'+',')',';','.'} - | '(' expr ')' - ; - - The FOLLOW sets are all inclusive whereas context-sensitive - FOLLOW sets are precisely what could follow a rule reference. - For input input "i=(3);", here is the derivation: - - stat => ID '=' expr ';' - => ID '=' atom ('+' atom)* ';' - => ID '=' '(' expr ')' ('+' atom)* ';' - => ID '=' '(' atom ')' ('+' atom)* ';' - => ID '=' '(' INT ')' ('+' atom)* ';' - => ID '=' '(' INT ')' ';' - - At the "3" token, you'd have a call chain of - - stat -> expr -> atom -> expr -> atom - - What can follow that specific nested ref to atom? Exactly ')' - as you can see by looking at the derivation of this specific - input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. - - You want the exact viable token set when recovering from a - token mismatch. Upon token mismatch, if LA(1) is member of - the viable next token set, then you know there is most likely - a missing token in the input stream. "Insert" one by just not - throwing an exception. - """ - - return self.combineFollows(True) - - - def combineFollows(self, exact): - followSet = set() - for idx, localFollowSet in reversed(list(enumerate(self._state.following))): - followSet |= localFollowSet - if exact: - # can we see end of rule? - if EOR_TOKEN_TYPE in localFollowSet: - # Only leave EOR in set if at top (start rule); this lets - # us know if have to include follow(start rule); i.e., EOF - if idx > 0: - followSet.remove(EOR_TOKEN_TYPE) - - else: - # can't see end of rule, quit - break - - return followSet - - - def recoverFromMismatchedToken(self, input, ttype, follow): - """Attempt to recover from a single missing or extra token. - - EXTRA TOKEN - - LA(1) is not what we are looking for. If LA(2) has the right token, - however, then assume LA(1) is some extra spurious token. Delete it - and LA(2) as if we were doing a normal match(), which advances the - input. - - MISSING TOKEN - - If current token is consistent with what could come after - ttype then it is ok to 'insert' the missing token, else throw - exception For example, Input 'i=(3;' is clearly missing the - ')'. When the parser returns from the nested call to expr, it - will have call chain: - - stat -> expr -> atom - - and it will be trying to match the ')' at this point in the - derivation: - - => ID '=' '(' INT ')' ('+' atom)* ';' - ^ - match() will see that ';' doesn't match ')' and report a - mismatched token error. To recover, it sees that LA(1)==';' - is in the set of tokens that can follow the ')' token - reference in rule atom. It can assume that you forgot the ')'. - """ - - e = None - - # if next token is what we are looking for then "delete" this token - if self.mismatchIsUnwantedToken(input, ttype): - e = UnwantedTokenException(ttype, input) - - self.beginResync() - input.consume() # simply delete extra token - self.endResync() - - # report after consuming so AW sees the token in the exception - self.reportError(e) - - # we want to return the token we're actually matching - matchedSymbol = self.getCurrentInputSymbol(input) - - # move past ttype token as if all were ok - input.consume() - return matchedSymbol - - # can't recover with single token deletion, try insertion - if self.mismatchIsMissingToken(input, follow): - inserted = self.getMissingSymbol(input, e, ttype, follow) - e = MissingTokenException(ttype, input, inserted) - - # report after inserting so AW sees the token in the exception - self.reportError(e) - return inserted - - # even that didn't work; must throw the exception - e = MismatchedTokenException(ttype, input) - raise e - - - def recoverFromMismatchedSet(self, input, e, follow): - """Not currently used""" - - if self.mismatchIsMissingToken(input, follow): - self.reportError(e) - # we don't know how to conjure up a token for sets yet - return self.getMissingSymbol(input, e, INVALID_TOKEN_TYPE, follow) - - # TODO do single token deletion like above for Token mismatch - raise e - - - def getCurrentInputSymbol(self, input): - """ - Match needs to return the current input symbol, which gets put - into the label for the associated token ref; e.g., x=ID. Token - and tree parsers need to return different objects. Rather than test - for input stream type or change the IntStream interface, I use - a simple method to ask the recognizer to tell me what the current - input symbol is. - - This is ignored for lexers. - """ - - return None - - - def getMissingSymbol(self, input, e, expectedTokenType, follow): - """Conjure up a missing token during error recovery. - - The recognizer attempts to recover from single missing - symbols. But, actions might refer to that missing symbol. - For example, x=ID {f($x);}. The action clearly assumes - that there has been an identifier matched previously and that - $x points at that token. If that token is missing, but - the next token in the stream is what we want we assume that - this token is missing and we keep going. Because we - have to return some token to replace the missing token, - we have to conjure one up. This method gives the user control - over the tokens returned for missing tokens. Mostly, - you will want to create something special for identifier - tokens. For literals such as '{' and ',', the default - action in the parser or tree parser works. It simply creates - a CommonToken of the appropriate type. The text will be the token. - If you change what tokens must be created by the lexer, - override this method to create the appropriate tokens. - """ - - return None - - -## def recoverFromMissingElement(self, input, e, follow): -## """ -## This code is factored out from mismatched token and mismatched set -## recovery. It handles "single token insertion" error recovery for -## both. No tokens are consumed to recover from insertions. Return -## true if recovery was possible else return false. -## """ - -## if self.mismatchIsMissingToken(input, follow): -## self.reportError(e) -## return True - -## # nothing to do; throw exception -## return False - - - def consumeUntil(self, input, tokenTypes): - """ - Consume tokens until one matches the given token or token set - - tokenTypes can be a single token type or a set of token types - - """ - - if not isinstance(tokenTypes, (set, frozenset)): - tokenTypes = frozenset([tokenTypes]) - - ttype = input.LA(1) - while ttype != EOF and ttype not in tokenTypes: - input.consume() - ttype = input.LA(1) - - - def getRuleInvocationStack(self): - """ - Return List of the rules in your parser instance - leading up to a call to this method. You could override if - you want more details such as the file/line info of where - in the parser java code a rule is invoked. - - This is very useful for error messages and for context-sensitive - error recovery. - - You must be careful, if you subclass a generated recognizers. - The default implementation will only search the module of self - for rules, but the subclass will not contain any rules. - You probably want to override this method to look like - - def getRuleInvocationStack(self): - return self._getRuleInvocationStack(.__module__) - - where is the class of the generated recognizer, e.g. - the superclass of self. - """ - - return self._getRuleInvocationStack(self.__module__) - - - def _getRuleInvocationStack(cls, module): - """ - A more general version of getRuleInvocationStack where you can - pass in, for example, a RecognitionException to get it's rule - stack trace. This routine is shared with all recognizers, hence, - static. - - TODO: move to a utility class or something; weird having lexer call - this - """ - - # mmmhhh,... perhaps look at the first argument - # (f_locals[co_varnames[0]]?) and test if it's a (sub)class of - # requested recognizer... - - rules = [] - for frame in reversed(inspect.stack()): - code = frame[0].f_code - codeMod = inspect.getmodule(code) - if codeMod is None: - continue - - # skip frames not in requested module - if codeMod.__name__ != module: - continue - - # skip some unwanted names - if code.co_name in ('nextToken', ''): - continue - - rules.append(code.co_name) - - return rules - - _getRuleInvocationStack = classmethod(_getRuleInvocationStack) - - - def getBacktrackingLevel(self): - return self._state.backtracking - - def setBacktrackingLevel(self, n): - self._state.backtracking = n - - - def getGrammarFileName(self): - """For debugging and other purposes, might want the grammar name. - - Have ANTLR generate an implementation for this method. - """ - - return self.grammarFileName - - - def getSourceName(self): - raise NotImplementedError - - - def toStrings(self, tokens): - """A convenience method for use most often with template rewrites. - - Convert a List to List - """ - - if tokens is None: - return None - - return [token.text for token in tokens] - - - def getRuleMemoization(self, ruleIndex, ruleStartIndex): - """ - Given a rule number and a start token index number, return - MEMO_RULE_UNKNOWN if the rule has not parsed input starting from - start index. If this rule has parsed input starting from the - start index before, then return where the rule stopped parsing. - It returns the index of the last token matched by the rule. - """ - - if ruleIndex not in self._state.ruleMemo: - self._state.ruleMemo[ruleIndex] = {} - - return self._state.ruleMemo[ruleIndex].get( - ruleStartIndex, self.MEMO_RULE_UNKNOWN - ) - - - def alreadyParsedRule(self, input, ruleIndex): - """ - Has this rule already parsed input at the current index in the - input stream? Return the stop token index or MEMO_RULE_UNKNOWN. - If we attempted but failed to parse properly before, return - MEMO_RULE_FAILED. - - This method has a side-effect: if we have seen this input for - this rule and successfully parsed before, then seek ahead to - 1 past the stop token matched for this rule last time. - """ - - stopIndex = self.getRuleMemoization(ruleIndex, input.index()) - if stopIndex == self.MEMO_RULE_UNKNOWN: - return False - - if stopIndex == self.MEMO_RULE_FAILED: - raise BacktrackingFailed - - else: - input.seek(stopIndex + 1) - - return True - - - def memoize(self, input, ruleIndex, ruleStartIndex, success): - """ - Record whether or not this rule parsed the input at this position - successfully. - """ - - if success: - stopTokenIndex = input.index() - 1 - else: - stopTokenIndex = self.MEMO_RULE_FAILED - - if ruleIndex in self._state.ruleMemo: - self._state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex - - - def traceIn(self, ruleName, ruleIndex, inputSymbol): - sys.stdout.write("enter %s %s" % (ruleName, inputSymbol)) - - if self._state.backtracking > 0: - sys.stdout.write(" backtracking=%s" % self._state.backtracking) - - sys.stdout.write('\n') - - - def traceOut(self, ruleName, ruleIndex, inputSymbol): - sys.stdout.write("exit %s %s" % (ruleName, inputSymbol)) - - if self._state.backtracking > 0: - sys.stdout.write(" backtracking=%s" % self._state.backtracking) - - # mmmm... we use BacktrackingFailed exceptions now. So how could we - # get that information here? - #if self._state.failed: - # sys.stdout.write(" failed") - #else: - # sys.stdout.write(" succeeded") - - sys.stdout.write('\n') - - -class TokenSource(object): - """ - @brief Abstract baseclass for token producers. - - A source of tokens must provide a sequence of tokens via nextToken() - and also must reveal it's source of characters; CommonToken's text is - computed from a CharStream; it only store indices into the char stream. - - Errors from the lexer are never passed to the parser. Either you want - to keep going or you do not upon token recognition error. If you do not - want to continue lexing then you do not want to continue parsing. Just - throw an exception not under RecognitionException and Java will naturally - toss you all the way out of the recognizers. If you want to continue - lexing then you should not throw an exception to the parser--it has already - requested a token. Keep lexing until you get a valid one. Just report - errors and keep going, looking for a valid token. - """ - - def nextToken(self): - """Return a Token object from your input stream (usually a CharStream). - - Do not fail/return upon lexing error; keep chewing on the characters - until you get a good one; errors are not passed through to the parser. - """ - - raise NotImplementedError - - - def __iter__(self): - """The TokenSource is an interator. - - The iteration will not include the final EOF token, see also the note - for the next() method. - - """ - - return self - - - def next(self): - """Return next token or raise StopIteration. - - Note that this will raise StopIteration when hitting the EOF token, - so EOF will not be part of the iteration. - - """ - - token = self.nextToken() - if token is None or token.type == EOF: - raise StopIteration - return token - - -class Lexer(BaseRecognizer, TokenSource): - """ - @brief Baseclass for generated lexer classes. - - A lexer is recognizer that draws input symbols from a character stream. - lexer grammars result in a subclass of this object. A Lexer object - uses simplified match() and error recovery mechanisms in the interest - of speed. - """ - - def __init__(self, input, state=None): - BaseRecognizer.__init__(self, state) - TokenSource.__init__(self) - - # Where is the lexer drawing characters from? - self.input = input - - - def reset(self): - BaseRecognizer.reset(self) # reset all recognizer state variables - - if self.input is not None: - # rewind the input - self.input.seek(0) - - if self._state is None: - # no shared state work to do - return - - # wack Lexer state variables - self._state.token = None - self._state.type = INVALID_TOKEN_TYPE - self._state.channel = DEFAULT_CHANNEL - self._state.tokenStartCharIndex = -1 - self._state.tokenStartLine = -1 - self._state.tokenStartCharPositionInLine = -1 - self._state.text = None - - - def makeEOFToken(self): - eof = CommonToken( - type=EOF, channel=DEFAULT_CHANNEL, - input=self.input, - start=self.input.index(), stop=self.input.index()) - eof.line = self.input.line - eof.charPositionInLine = self.input.charPositionInLine - return eof - - def nextToken(self): - """ - Return a token from this source; i.e., match a token on the char - stream. - """ - - while 1: - self._state.token = None - self._state.channel = DEFAULT_CHANNEL - self._state.tokenStartCharIndex = self.input.index() - self._state.tokenStartCharPositionInLine = self.input.charPositionInLine - self._state.tokenStartLine = self.input.line - self._state.text = None - if self.input.LA(1) == EOF: - return self.makeEOFToken() - - try: - self.mTokens() - - if self._state.token is None: - self.emit() - - elif self._state.token == SKIP_TOKEN: - continue - - return self._state.token - - except NoViableAltException, re: - self.reportError(re) - self.recover(re) # throw out current char and try again - - except RecognitionException, re: - self.reportError(re) - # match() routine has already called recover() - - - def skip(self): - """ - Instruct the lexer to skip creating a token for current lexer rule - and look for another token. nextToken() knows to keep looking when - a lexer rule finishes with token set to SKIP_TOKEN. Recall that - if token==null at end of any token rule, it creates one for you - and emits it. - """ - - self._state.token = SKIP_TOKEN - - - def mTokens(self): - """This is the lexer entry point that sets instance var 'token'""" - - # abstract method - raise NotImplementedError - - - def setCharStream(self, input): - """Set the char stream and reset the lexer""" - self.input = None - self.reset() - self.input = input - - - def getSourceName(self): - return self.input.getSourceName() - - - def emit(self, token=None): - """ - The standard method called to automatically emit a token at the - outermost lexical rule. The token object should point into the - char buffer start..stop. If there is a text override in 'text', - use that to set the token's text. Override this method to emit - custom Token objects. - - If you are building trees, then you should also override - Parser or TreeParser.getMissingSymbol(). - """ - - if token is None: - token = CommonToken( - input=self.input, - type=self._state.type, - channel=self._state.channel, - start=self._state.tokenStartCharIndex, - stop=self.getCharIndex()-1 - ) - token.line = self._state.tokenStartLine - token.text = self._state.text - token.charPositionInLine = self._state.tokenStartCharPositionInLine - - self._state.token = token - - return token - - - def match(self, s): - if isinstance(s, basestring): - for c in s: - if self.input.LA(1) != ord(c): - if self._state.backtracking > 0: - raise BacktrackingFailed - - mte = MismatchedTokenException(c, self.input) - self.recover(mte) - raise mte - - self.input.consume() - - else: - if self.input.LA(1) != s: - if self._state.backtracking > 0: - raise BacktrackingFailed - - mte = MismatchedTokenException(unichr(s), self.input) - self.recover(mte) # don't really recover; just consume in lexer - raise mte - - self.input.consume() - - - def matchAny(self): - self.input.consume() - - - def matchRange(self, a, b): - if self.input.LA(1) < a or self.input.LA(1) > b: - if self._state.backtracking > 0: - raise BacktrackingFailed - - mre = MismatchedRangeException(unichr(a), unichr(b), self.input) - self.recover(mre) - raise mre - - self.input.consume() - - - def getLine(self): - return self.input.line - - - def getCharPositionInLine(self): - return self.input.charPositionInLine - - - def getCharIndex(self): - """What is the index of the current character of lookahead?""" - - return self.input.index() - - - def getText(self): - """ - Return the text matched so far for the current token or any - text override. - """ - if self._state.text is not None: - return self._state.text - - return self.input.substring( - self._state.tokenStartCharIndex, - self.getCharIndex()-1 - ) - - - def setText(self, text): - """ - Set the complete text of this token; it wipes any previous - changes to the text. - """ - self._state.text = text - - - text = property(getText, setText) - - - def reportError(self, e): - ## TODO: not thought about recovery in lexer yet. - - ## # if we've already reported an error and have not matched a token - ## # yet successfully, don't report any errors. - ## if self.errorRecovery: - ## #System.err.print("[SPURIOUS] "); - ## return; - ## - ## self.errorRecovery = True - - self.displayRecognitionError(self.tokenNames, e) - - - def getErrorMessage(self, e, tokenNames): - msg = None - - if isinstance(e, MismatchedTokenException): - msg = "mismatched character " \ - + self.getCharErrorDisplay(e.c) \ - + " expecting " \ - + self.getCharErrorDisplay(e.expecting) - - elif isinstance(e, NoViableAltException): - msg = "no viable alternative at character " \ - + self.getCharErrorDisplay(e.c) - - elif isinstance(e, EarlyExitException): - msg = "required (...)+ loop did not match anything at character " \ - + self.getCharErrorDisplay(e.c) - - elif isinstance(e, MismatchedNotSetException): - msg = "mismatched character " \ - + self.getCharErrorDisplay(e.c) \ - + " expecting set " \ - + repr(e.expecting) - - elif isinstance(e, MismatchedSetException): - msg = "mismatched character " \ - + self.getCharErrorDisplay(e.c) \ - + " expecting set " \ - + repr(e.expecting) - - elif isinstance(e, MismatchedRangeException): - msg = "mismatched character " \ - + self.getCharErrorDisplay(e.c) \ - + " expecting set " \ - + self.getCharErrorDisplay(e.a) \ - + ".." \ - + self.getCharErrorDisplay(e.b) - - else: - msg = BaseRecognizer.getErrorMessage(self, e, tokenNames) - - return msg - - - def getCharErrorDisplay(self, c): - if c == EOF: - c = '' - return repr(c) - - - def recover(self, re): - """ - Lexers can normally match any char in it's vocabulary after matching - a token, so do the easy thing and just kill a character and hope - it all works out. You can instead use the rule invocation stack - to do sophisticated error recovery if you are in a fragment rule. - """ - - self.input.consume() - - - def traceIn(self, ruleName, ruleIndex): - inputSymbol = "%s line=%d:%s" % (self.input.LT(1), - self.getLine(), - self.getCharPositionInLine() - ) - - BaseRecognizer.traceIn(self, ruleName, ruleIndex, inputSymbol) - - - def traceOut(self, ruleName, ruleIndex): - inputSymbol = "%s line=%d:%s" % (self.input.LT(1), - self.getLine(), - self.getCharPositionInLine() - ) - - BaseRecognizer.traceOut(self, ruleName, ruleIndex, inputSymbol) - - - -class Parser(BaseRecognizer): - """ - @brief Baseclass for generated parser classes. - """ - - def __init__(self, lexer, state=None): - BaseRecognizer.__init__(self, state) - - self.input = lexer - - - def reset(self): - BaseRecognizer.reset(self) # reset all recognizer state variables - if self.input is not None: - self.input.seek(0) # rewind the input - - - def getCurrentInputSymbol(self, input): - return input.LT(1) - - - def getMissingSymbol(self, input, e, expectedTokenType, follow): - if expectedTokenType == EOF: - tokenText = "" - else: - tokenText = "" - t = CommonToken(type=expectedTokenType, text=tokenText) - current = input.LT(1) - if current.type == EOF: - current = input.LT(-1) - - if current is not None: - t.line = current.line - t.charPositionInLine = current.charPositionInLine - t.channel = DEFAULT_CHANNEL - return t - - - def setTokenStream(self, input): - """Set the token stream and reset the parser""" - - self.input = None - self.reset() - self.input = input - - - def getTokenStream(self): - return self.input - - - def getSourceName(self): - return self.input.getSourceName() - - - def traceIn(self, ruleName, ruleIndex): - BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1)) - - - def traceOut(self, ruleName, ruleIndex): - BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1)) - - -class RuleReturnScope(object): - """ - Rules can return start/stop info as well as possible trees and templates. - """ - - def getStart(self): - """Return the start token or tree.""" - return None - - - def getStop(self): - """Return the stop token or tree.""" - return None - - - def getTree(self): - """Has a value potentially if output=AST.""" - return None - - - def getTemplate(self): - """Has a value potentially if output=template.""" - return None - - -class ParserRuleReturnScope(RuleReturnScope): - """ - Rules that return more than a single value must return an object - containing all the values. Besides the properties defined in - RuleLabelScope.predefinedRulePropertiesScope there may be user-defined - return values. This class simply defines the minimum properties that - are always defined and methods to access the others that might be - available depending on output option such as template and tree. - - Note text is not an actual property of the return value, it is computed - from start and stop using the input stream's toString() method. I - could add a ctor to this so that we can pass in and store the input - stream, but I'm not sure we want to do that. It would seem to be undefined - to get the .text property anyway if the rule matches tokens from multiple - input streams. - - I do not use getters for fields of objects that are used simply to - group values such as this aggregate. The getters/setters are there to - satisfy the superclass interface. - """ - - def __init__(self): - self.start = None - self.stop = None - self.tree = None # only used when output=AST - - - def getStart(self): - return self.start - - - def getStop(self): - return self.stop - - - def getTree(self): - return self.tree diff --git a/thirdparty/antlr3/streams.py b/thirdparty/antlr3/streams.py deleted file mode 100644 index 84016bd52..000000000 --- a/thirdparty/antlr3/streams.py +++ /dev/null @@ -1,1522 +0,0 @@ -"""ANTLR3 runtime package""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - -import codecs -from StringIO import StringIO - -from antlr3.constants import DEFAULT_CHANNEL, EOF -from antlr3.tokens import Token, CommonToken - - -############################################################################ -# -# basic interfaces -# IntStream -# +- CharStream -# \- TokenStream -# -# subclasses must implemented all methods -# -############################################################################ - -class IntStream(object): - """ - @brief Base interface for streams of integer values. - - A simple stream of integers used when all I care about is the char - or token type sequence (such as interpretation). - """ - - def consume(self): - raise NotImplementedError - - - def LA(self, i): - """Get int at current input pointer + i ahead where i=1 is next int. - - Negative indexes are allowed. LA(-1) is previous token (token - just matched). LA(-i) where i is before first token should - yield -1, invalid char / EOF. - """ - - raise NotImplementedError - - - def mark(self): - """ - Tell the stream to start buffering if it hasn't already. Return - current input position, index(), or some other marker so that - when passed to rewind() you get back to the same spot. - rewind(mark()) should not affect the input cursor. The Lexer - track line/col info as well as input index so its markers are - not pure input indexes. Same for tree node streams. - """ - - raise NotImplementedError - - - def index(self): - """ - Return the current input symbol index 0..n where n indicates the - last symbol has been read. The index is the symbol about to be - read not the most recently read symbol. - """ - - raise NotImplementedError - - - def rewind(self, marker=None): - """ - Reset the stream so that next call to index would return marker. - The marker will usually be index() but it doesn't have to be. It's - just a marker to indicate what state the stream was in. This is - essentially calling release() and seek(). If there are markers - created after this marker argument, this routine must unroll them - like a stack. Assume the state the stream was in when this marker - was created. - - If marker is None: - Rewind to the input position of the last marker. - Used currently only after a cyclic DFA and just - before starting a sem/syn predicate to get the - input position back to the start of the decision. - Do not "pop" the marker off the state. mark(i) - and rewind(i) should balance still. It is - like invoking rewind(last marker) but it should not "pop" - the marker off. It's like seek(last marker's input position). - """ - - raise NotImplementedError - - - def release(self, marker=None): - """ - You may want to commit to a backtrack but don't want to force the - stream to keep bookkeeping objects around for a marker that is - no longer necessary. This will have the same behavior as - rewind() except it releases resources without the backward seek. - This must throw away resources for all markers back to the marker - argument. So if you're nested 5 levels of mark(), and then release(2) - you have to release resources for depths 2..5. - """ - - raise NotImplementedError - - - def seek(self, index): - """ - Set the input cursor to the position indicated by index. This is - normally used to seek ahead in the input stream. No buffering is - required to do this unless you know your stream will use seek to - move backwards such as when backtracking. - - This is different from rewind in its multi-directional - requirement and in that its argument is strictly an input cursor - (index). - - For char streams, seeking forward must update the stream state such - as line number. For seeking backwards, you will be presumably - backtracking using the mark/rewind mechanism that restores state and - so this method does not need to update state when seeking backwards. - - Currently, this method is only used for efficient backtracking using - memoization, but in the future it may be used for incremental parsing. - - The index is 0..n-1. A seek to position i means that LA(1) will - return the ith symbol. So, seeking to 0 means LA(1) will return the - first element in the stream. - """ - - raise NotImplementedError - - - def size(self): - """ - Only makes sense for streams that buffer everything up probably, but - might be useful to display the entire stream or for testing. This - value includes a single EOF. - """ - - raise NotImplementedError - - - def getSourceName(self): - """ - Where are you getting symbols from? Normally, implementations will - pass the buck all the way to the lexer who can ask its input stream - for the file name or whatever. - """ - - raise NotImplementedError - - -class CharStream(IntStream): - """ - @brief A source of characters for an ANTLR lexer. - - This is an abstract class that must be implemented by a subclass. - - """ - - # pylint does not realize that this is an interface, too - #pylint: disable-msg=W0223 - - EOF = -1 - - - def substring(self, start, stop): - """ - For infinite streams, you don't need this; primarily I'm providing - a useful interface for action code. Just make sure actions don't - use this on streams that don't support it. - """ - - raise NotImplementedError - - - def LT(self, i): - """ - Get the ith character of lookahead. This is the same usually as - LA(i). This will be used for labels in the generated - lexer code. I'd prefer to return a char here type-wise, but it's - probably better to be 32-bit clean and be consistent with LA. - """ - - raise NotImplementedError - - - def getLine(self): - """ANTLR tracks the line information automatically""" - - raise NotImplementedError - - - def setLine(self, line): - """ - Because this stream can rewind, we need to be able to reset the line - """ - - raise NotImplementedError - - - def getCharPositionInLine(self): - """ - The index of the character relative to the beginning of the line 0..n-1 - """ - - raise NotImplementedError - - - def setCharPositionInLine(self, pos): - raise NotImplementedError - - -class TokenStream(IntStream): - """ - - @brief A stream of tokens accessing tokens from a TokenSource - - This is an abstract class that must be implemented by a subclass. - - """ - - # pylint does not realize that this is an interface, too - #pylint: disable-msg=W0223 - - def LT(self, k): - """ - Get Token at current input pointer + i ahead where i=1 is next Token. - i<0 indicates tokens in the past. So -1 is previous token and -2 is - two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. - Return null for LT(0) and any index that results in an absolute address - that is negative. - """ - - raise NotImplementedError - - - def range(self): - """ - How far ahead has the stream been asked to look? The return - value is a valid index from 0..n-1. - """ - - raise NotImplementedError - - - def get(self, i): - """ - Get a token at an absolute index i; 0..n-1. This is really only - needed for profiling and debugging and token stream rewriting. - If you don't want to buffer up tokens, then this method makes no - sense for you. Naturally you can't use the rewrite stream feature. - I believe DebugTokenStream can easily be altered to not use - this method, removing the dependency. - """ - - raise NotImplementedError - - - def getTokenSource(self): - """ - Where is this stream pulling tokens from? This is not the name, but - the object that provides Token objects. - """ - - raise NotImplementedError - - - def toString(self, start=None, stop=None): - """ - Return the text of all tokens from start to stop, inclusive. - If the stream does not buffer all the tokens then it can just - return "" or null; Users should not access $ruleLabel.text in - an action of course in that case. - - Because the user is not required to use a token with an index stored - in it, we must provide a means for two token objects themselves to - indicate the start/end location. Most often this will just delegate - to the other toString(int,int). This is also parallel with - the TreeNodeStream.toString(Object,Object). - """ - - raise NotImplementedError - - -############################################################################ -# -# character streams for use in lexers -# CharStream -# \- ANTLRStringStream -# -############################################################################ - - -class ANTLRStringStream(CharStream): - """ - @brief CharStream that pull data from a unicode string. - - A pretty quick CharStream that pulls all data from an array - directly. Every method call counts in the lexer. - - """ - - - def __init__(self, data): - """ - @param data This should be a unicode string holding the data you want - to parse. If you pass in a byte string, the Lexer will choke on - non-ascii data. - - """ - - CharStream.__init__(self) - - # The data being scanned - self.strdata = unicode(data) - self.data = [ord(c) for c in self.strdata] - - # How many characters are actually in the buffer - self.n = len(data) - - # 0..n-1 index into string of next char - self.p = 0 - - # line number 1..n within the input - self.line = 1 - - # The index of the character relative to the beginning of the - # line 0..n-1 - self.charPositionInLine = 0 - - # A list of CharStreamState objects that tracks the stream state - # values line, charPositionInLine, and p that can change as you - # move through the input stream. Indexed from 0..markDepth-1. - self._markers = [ ] - self.lastMarker = None - self.markDepth = 0 - - # What is name or source of this char stream? - self.name = None - - - def reset(self): - """ - Reset the stream so that it's in the same state it was - when the object was created *except* the data array is not - touched. - """ - - self.p = 0 - self.line = 1 - self.charPositionInLine = 0 - self._markers = [ ] - - - def consume(self): - try: - if self.data[self.p] == 10: # \n - self.line += 1 - self.charPositionInLine = 0 - else: - self.charPositionInLine += 1 - - self.p += 1 - - except IndexError: - # happend when we reached EOF and self.data[self.p] fails - # just do nothing - pass - - - - def LA(self, i): - if i == 0: - return 0 # undefined - - if i < 0: - i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1] - - try: - return self.data[self.p+i-1] - except IndexError: - return EOF - - - - def LT(self, i): - if i == 0: - return 0 # undefined - - if i < 0: - i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1] - - try: - return self.strdata[self.p+i-1] - except IndexError: - return EOF - - - def index(self): - """ - Return the current input symbol index 0..n where n indicates the - last symbol has been read. The index is the index of char to - be returned from LA(1). - """ - - return self.p - - - def size(self): - return self.n - - - def mark(self): - state = (self.p, self.line, self.charPositionInLine) - try: - self._markers[self.markDepth] = state - except IndexError: - self._markers.append(state) - self.markDepth += 1 - - self.lastMarker = self.markDepth - - return self.lastMarker - - - def rewind(self, marker=None): - if marker is None: - marker = self.lastMarker - - p, line, charPositionInLine = self._markers[marker-1] - - self.seek(p) - self.line = line - self.charPositionInLine = charPositionInLine - self.release(marker) - - - def release(self, marker=None): - if marker is None: - marker = self.lastMarker - - self.markDepth = marker-1 - - - def seek(self, index): - """ - consume() ahead until p==index; can't just set p=index as we must - update line and charPositionInLine. - """ - - if index <= self.p: - self.p = index # just jump; don't update stream state (line, ...) - return - - # seek forward, consume until p hits index - while self.p < index: - self.consume() - - - def substring(self, start, stop): - return self.strdata[start:stop+1] - - - def getLine(self): - """Using setter/getter methods is deprecated. Use o.line instead.""" - return self.line - - - def getCharPositionInLine(self): - """ - Using setter/getter methods is deprecated. Use o.charPositionInLine - instead. - """ - return self.charPositionInLine - - - def setLine(self, line): - """Using setter/getter methods is deprecated. Use o.line instead.""" - self.line = line - - - def setCharPositionInLine(self, pos): - """ - Using setter/getter methods is deprecated. Use o.charPositionInLine - instead. - """ - self.charPositionInLine = pos - - - def getSourceName(self): - return self.name - - -class ANTLRFileStream(ANTLRStringStream): - """ - @brief CharStream that opens a file to read the data. - - This is a char buffer stream that is loaded from a file - all at once when you construct the object. - """ - - def __init__(self, fileName, encoding=None): - """ - @param fileName The path to the file to be opened. The file will be - opened with mode 'rb'. - - @param encoding If you set the optional encoding argument, then the - data will be decoded on the fly. - - """ - - self.fileName = fileName - - fp = codecs.open(fileName, 'rb', encoding) - try: - data = fp.read() - finally: - fp.close() - - ANTLRStringStream.__init__(self, data) - - - def getSourceName(self): - """Deprecated, access o.fileName directly.""" - - return self.fileName - - -class ANTLRInputStream(ANTLRStringStream): - """ - @brief CharStream that reads data from a file-like object. - - This is a char buffer stream that is loaded from a file like object - all at once when you construct the object. - - All input is consumed from the file, but it is not closed. - """ - - def __init__(self, file, encoding=None): - """ - @param file A file-like object holding your input. Only the read() - method must be implemented. - - @param encoding If you set the optional encoding argument, then the - data will be decoded on the fly. - - """ - - if encoding is not None: - # wrap input in a decoding reader - reader = codecs.lookup(encoding)[2] - file = reader(file) - - data = file.read() - - ANTLRStringStream.__init__(self, data) - - -# I guess the ANTLR prefix exists only to avoid a name clash with some Java -# mumbojumbo. A plain "StringStream" looks better to me, which should be -# the preferred name in Python. -StringStream = ANTLRStringStream -FileStream = ANTLRFileStream -InputStream = ANTLRInputStream - - -############################################################################ -# -# Token streams -# TokenStream -# +- CommonTokenStream -# \- TokenRewriteStream -# -############################################################################ - - -class CommonTokenStream(TokenStream): - """ - @brief The most common stream of tokens - - The most common stream of tokens is one where every token is buffered up - and tokens are prefiltered for a certain channel (the parser will only - see these tokens and cannot change the filter channel number during the - parse). - """ - - def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): - """ - @param tokenSource A TokenSource instance (usually a Lexer) to pull - the tokens from. - - @param channel Skip tokens on any channel but this one; this is how we - skip whitespace... - - """ - - TokenStream.__init__(self) - - self.tokenSource = tokenSource - - # Record every single token pulled from the source so we can reproduce - # chunks of it later. - self.tokens = [] - - # Map to override some Tokens' channel numbers - self.channelOverrideMap = {} - - # Set; discard any tokens with this type - self.discardSet = set() - - # Skip tokens on any channel but this one; this is how we skip - # whitespace... - self.channel = channel - - # By default, track all incoming tokens - self.discardOffChannelTokens = False - - # The index into the tokens list of the current token (next token - # to consume). p==-1 indicates that the tokens list is empty - self.p = -1 - - # Remember last marked position - self.lastMarker = None - - # how deep have we gone? - self._range = -1 - - - def makeEOFToken(self): - return self.tokenSource.makeEOFToken() - - - def setTokenSource(self, tokenSource): - """Reset this token stream by setting its token source.""" - - self.tokenSource = tokenSource - self.tokens = [] - self.p = -1 - self.channel = DEFAULT_CHANNEL - - - def reset(self): - self.p = 0 - self.lastMarker = None - - - def fillBuffer(self): - """ - Load all tokens from the token source and put in tokens. - This is done upon first LT request because you might want to - set some token type / channel overrides before filling buffer. - """ - - - index = 0 - t = self.tokenSource.nextToken() - while t is not None and t.type != EOF: - discard = False - - if self.discardSet is not None and t.type in self.discardSet: - discard = True - - elif self.discardOffChannelTokens and t.channel != self.channel: - discard = True - - # is there a channel override for token type? - try: - overrideChannel = self.channelOverrideMap[t.type] - - except KeyError: - # no override for this type - pass - - else: - if overrideChannel == self.channel: - t.channel = overrideChannel - else: - discard = True - - if not discard: - t.index = index - self.tokens.append(t) - index += 1 - - t = self.tokenSource.nextToken() - - # leave p pointing at first token on channel - self.p = 0 - self.p = self.skipOffTokenChannels(self.p) - - - def consume(self): - """ - Move the input pointer to the next incoming token. The stream - must become active with LT(1) available. consume() simply - moves the input pointer so that LT(1) points at the next - input symbol. Consume at least one token. - - Walk past any token not on the channel the parser is listening to. - """ - - if self.p < len(self.tokens): - self.p += 1 - - self.p = self.skipOffTokenChannels(self.p) # leave p on valid token - - - def skipOffTokenChannels(self, i): - """ - Given a starting index, return the index of the first on-channel - token. - """ - - try: - while self.tokens[i].channel != self.channel: - i += 1 - except IndexError: - # hit the end of token stream - pass - - return i - - - def skipOffTokenChannelsReverse(self, i): - while i >= 0 and self.tokens[i].channel != self.channel: - i -= 1 - - return i - - - def setTokenTypeChannel(self, ttype, channel): - """ - A simple filter mechanism whereby you can tell this token stream - to force all tokens of type ttype to be on channel. For example, - when interpreting, we cannot exec actions so we need to tell - the stream to force all WS and NEWLINE to be a different, ignored - channel. - """ - - self.channelOverrideMap[ttype] = channel - - - def discardTokenType(self, ttype): - self.discardSet.add(ttype) - - - def getTokens(self, start=None, stop=None, types=None): - """ - Given a start and stop index, return a list of all tokens in - the token type set. Return None if no tokens were found. This - method looks at both on and off channel tokens. - """ - - if self.p == -1: - self.fillBuffer() - - if stop is None or stop > len(self.tokens): - stop = len(self.tokens) - - if start is None or stop < 0: - start = 0 - - if start > stop: - return None - - if isinstance(types, (int, long)): - # called with a single type, wrap into set - types = set([types]) - - filteredTokens = [ - token for token in self.tokens[start:stop] - if types is None or token.type in types - ] - - if len(filteredTokens) == 0: - return None - - return filteredTokens - - - def LT(self, k): - """ - Get the ith token from the current position 1..n where k=1 is the - first symbol of lookahead. - """ - - if self.p == -1: - self.fillBuffer() - - if k == 0: - return None - - if k < 0: - return self.LB(-k) - - i = self.p - n = 1 - # find k good tokens - while n < k: - # skip off-channel tokens - i = self.skipOffTokenChannels(i+1) # leave p on valid token - n += 1 - - if i > self._range: - self._range = i - - try: - return self.tokens[i] - except IndexError: - return self.makeEOFToken() - - - def LB(self, k): - """Look backwards k tokens on-channel tokens""" - - if self.p == -1: - self.fillBuffer() - - if k == 0: - return None - - if self.p - k < 0: - return None - - i = self.p - n = 1 - # find k good tokens looking backwards - while n <= k: - # skip off-channel tokens - i = self.skipOffTokenChannelsReverse(i-1) # leave p on valid token - n += 1 - - if i < 0: - return None - - return self.tokens[i] - - - def get(self, i): - """ - Return absolute token i; ignore which channel the tokens are on; - that is, count all tokens not just on-channel tokens. - """ - - return self.tokens[i] - - - def slice(self, start, stop): - if self.p == -1: - self.fillBuffer() - - if start < 0 or stop < 0: - return None - - return self.tokens[start:stop+1] - - - def LA(self, i): - return self.LT(i).type - - - def mark(self): - self.lastMarker = self.index() - return self.lastMarker - - - def release(self, marker=None): - # no resources to release - pass - - - def size(self): - return len(self.tokens) - - - def range(self): - return self._range - - - def index(self): - return self.p - - - def rewind(self, marker=None): - if marker is None: - marker = self.lastMarker - - self.seek(marker) - - - def seek(self, index): - self.p = index - - - def getTokenSource(self): - return self.tokenSource - - - def getSourceName(self): - return self.tokenSource.getSourceName() - - - def toString(self, start=None, stop=None): - if self.p == -1: - self.fillBuffer() - - if start is None: - start = 0 - elif not isinstance(start, int): - start = start.index - - if stop is None: - stop = len(self.tokens) - 1 - elif not isinstance(stop, int): - stop = stop.index - - if stop >= len(self.tokens): - stop = len(self.tokens) - 1 - - return ''.join([t.text for t in self.tokens[start:stop+1]]) - - -class RewriteOperation(object): - """@brief Internal helper class.""" - - def __init__(self, stream, index, text): - self.stream = stream - - # What index into rewrites List are we? - self.instructionIndex = None - - # Token buffer index. - self.index = index - self.text = text - - def execute(self, buf): - """Execute the rewrite operation by possibly adding to the buffer. - Return the index of the next token to operate on. - """ - - return self.index - - def toString(self): - opName = self.__class__.__name__ - return '<%s@%d:"%s">' % ( - opName, self.index, self.text) - - __str__ = toString - __repr__ = toString - - -class InsertBeforeOp(RewriteOperation): - """@brief Internal helper class.""" - - def execute(self, buf): - buf.write(self.text) - if self.stream.tokens[self.index].type != EOF: - buf.write(self.stream.tokens[self.index].text) - return self.index + 1 - - -class ReplaceOp(RewriteOperation): - """ - @brief Internal helper class. - - I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp - instructions. - """ - - def __init__(self, stream, first, last, text): - RewriteOperation.__init__(self, stream, first, text) - self.lastIndex = last - - - def execute(self, buf): - if self.text is not None: - buf.write(self.text) - - return self.lastIndex + 1 - - - def toString(self): - if self.text is None: - return '' % (self.index, self.lastIndex) - - return '' % ( - self.index, self.lastIndex, self.text) - - __str__ = toString - __repr__ = toString - - -class TokenRewriteStream(CommonTokenStream): - """@brief CommonTokenStream that can be modified. - - Useful for dumping out the input stream after doing some - augmentation or other manipulations. - - You can insert stuff, replace, and delete chunks. Note that the - operations are done lazily--only if you convert the buffer to a - String. This is very efficient because you are not moving data around - all the time. As the buffer of tokens is converted to strings, the - toString() method(s) check to see if there is an operation at the - current index. If so, the operation is done and then normal String - rendering continues on the buffer. This is like having multiple Turing - machine instruction streams (programs) operating on a single input tape. :) - - Since the operations are done lazily at toString-time, operations do not - screw up the token index values. That is, an insert operation at token - index i does not change the index values for tokens i+1..n-1. - - Because operations never actually alter the buffer, you may always get - the original token stream back without undoing anything. Since - the instructions are queued up, you can easily simulate transactions and - roll back any changes if there is an error just by removing instructions. - For example, - - CharStream input = new ANTLRFileStream("input"); - TLexer lex = new TLexer(input); - TokenRewriteStream tokens = new TokenRewriteStream(lex); - T parser = new T(tokens); - parser.startRule(); - - Then in the rules, you can execute - Token t,u; - ... - input.insertAfter(t, "text to put after t");} - input.insertAfter(u, "text after u");} - System.out.println(tokens.toString()); - - Actually, you have to cast the 'input' to a TokenRewriteStream. :( - - You can also have multiple "instruction streams" and get multiple - rewrites from a single pass over the input. Just name the instruction - streams and use that name again when printing the buffer. This could be - useful for generating a C file and also its header file--all from the - same buffer: - - tokens.insertAfter("pass1", t, "text to put after t");} - tokens.insertAfter("pass2", u, "text after u");} - System.out.println(tokens.toString("pass1")); - System.out.println(tokens.toString("pass2")); - - If you don't use named rewrite streams, a "default" stream is used as - the first example shows. - """ - - DEFAULT_PROGRAM_NAME = "default" - MIN_TOKEN_INDEX = 0 - - def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): - CommonTokenStream.__init__(self, tokenSource, channel) - - # You may have multiple, named streams of rewrite operations. - # I'm calling these things "programs." - # Maps String (name) -> rewrite (List) - self.programs = {} - self.programs[self.DEFAULT_PROGRAM_NAME] = [] - - # Map String (program name) -> Integer index - self.lastRewriteTokenIndexes = {} - - - def rollback(self, *args): - """ - Rollback the instruction stream for a program so that - the indicated instruction (via instructionIndex) is no - longer in the stream. UNTESTED! - """ - - if len(args) == 2: - programName = args[0] - instructionIndex = args[1] - elif len(args) == 1: - programName = self.DEFAULT_PROGRAM_NAME - instructionIndex = args[0] - else: - raise TypeError("Invalid arguments") - - p = self.programs.get(programName, None) - if p is not None: - self.programs[programName] = ( - p[self.MIN_TOKEN_INDEX:instructionIndex]) - - - def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME): - """Reset the program so that no instructions exist""" - - self.rollback(programName, self.MIN_TOKEN_INDEX) - - - def insertAfter(self, *args): - if len(args) == 2: - programName = self.DEFAULT_PROGRAM_NAME - index = args[0] - text = args[1] - - elif len(args) == 3: - programName = args[0] - index = args[1] - text = args[2] - - else: - raise TypeError("Invalid arguments") - - if isinstance(index, Token): - # index is a Token, grap the stream index from it - index = index.index - - # to insert after, just insert before next index (even if past end) - self.insertBefore(programName, index+1, text) - - - def insertBefore(self, *args): - if len(args) == 2: - programName = self.DEFAULT_PROGRAM_NAME - index = args[0] - text = args[1] - - elif len(args) == 3: - programName = args[0] - index = args[1] - text = args[2] - - else: - raise TypeError("Invalid arguments") - - if isinstance(index, Token): - # index is a Token, grap the stream index from it - index = index.index - - op = InsertBeforeOp(self, index, text) - rewrites = self.getProgram(programName) - op.instructionIndex = len(rewrites) - rewrites.append(op) - - - def replace(self, *args): - if len(args) == 2: - programName = self.DEFAULT_PROGRAM_NAME - first = args[0] - last = args[0] - text = args[1] - - elif len(args) == 3: - programName = self.DEFAULT_PROGRAM_NAME - first = args[0] - last = args[1] - text = args[2] - - elif len(args) == 4: - programName = args[0] - first = args[1] - last = args[2] - text = args[3] - - else: - raise TypeError("Invalid arguments") - - if isinstance(first, Token): - # first is a Token, grap the stream index from it - first = first.index - - if isinstance(last, Token): - # last is a Token, grap the stream index from it - last = last.index - - if first > last or first < 0 or last < 0 or last >= len(self.tokens): - raise ValueError( - "replace: range invalid: %d..%d (size=%d)" - % (first, last, len(self.tokens))) - - op = ReplaceOp(self, first, last, text) - rewrites = self.getProgram(programName) - op.instructionIndex = len(rewrites) - rewrites.append(op) - - - def delete(self, *args): - self.replace(*(list(args) + [None])) - - - def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME): - return self.lastRewriteTokenIndexes.get(programName, -1) - - - def setLastRewriteTokenIndex(self, programName, i): - self.lastRewriteTokenIndexes[programName] = i - - - def getProgram(self, name): - p = self.programs.get(name, None) - if p is None: - p = self.initializeProgram(name) - - return p - - - def initializeProgram(self, name): - p = [] - self.programs[name] = p - return p - - - def toOriginalString(self, start=None, end=None): - if self.p == -1: - self.fillBuffer() - - if start is None: - start = self.MIN_TOKEN_INDEX - if end is None: - end = self.size() - 1 - - buf = StringIO() - i = start - while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): - if self.get(i).type != EOF: - buf.write(self.get(i).text) - i += 1 - - return buf.getvalue() - - - def toString(self, *args): - if self.p == -1: - self.fillBuffer() - - if len(args) == 0: - programName = self.DEFAULT_PROGRAM_NAME - start = self.MIN_TOKEN_INDEX - end = self.size() - 1 - - elif len(args) == 1: - programName = args[0] - start = self.MIN_TOKEN_INDEX - end = self.size() - 1 - - elif len(args) == 2: - programName = self.DEFAULT_PROGRAM_NAME - start = args[0] - end = args[1] - - if start is None: - start = self.MIN_TOKEN_INDEX - elif not isinstance(start, int): - start = start.index - - if end is None: - end = len(self.tokens) - 1 - elif not isinstance(end, int): - end = end.index - - # ensure start/end are in range - if end >= len(self.tokens): - end = len(self.tokens) - 1 - - if start < 0: - start = 0 - - rewrites = self.programs.get(programName) - if rewrites is None or len(rewrites) == 0: - # no instructions to execute - return self.toOriginalString(start, end) - - buf = StringIO() - - # First, optimize instruction stream - indexToOp = self.reduceToSingleOperationPerIndex(rewrites) - - # Walk buffer, executing instructions and emitting tokens - i = start - while i <= end and i < len(self.tokens): - op = indexToOp.get(i) - # remove so any left have index size-1 - try: - del indexToOp[i] - except KeyError: - pass - - t = self.tokens[i] - if op is None: - # no operation at that index, just dump token - if t.type != EOF: - buf.write(t.text) - i += 1 # move to next token - - else: - i = op.execute(buf) # execute operation and skip - - # include stuff after end if it's last index in buffer - # So, if they did an insertAfter(lastValidIndex, "foo"), include - # foo if end==lastValidIndex. - if end == len(self.tokens) - 1: - # Scan any remaining operations after last token - # should be included (they will be inserts). - for i in sorted(indexToOp.keys()): - op = indexToOp[i] - if op.index >= len(self.tokens)-1: - buf.write(op.text) - - return buf.getvalue() - - __str__ = toString - - - def reduceToSingleOperationPerIndex(self, rewrites): - """ - We need to combine operations and report invalid operations (like - overlapping replaces that are not completed nested). Inserts to - same index need to be combined etc... Here are the cases: - - I.i.u I.j.v leave alone, nonoverlapping - I.i.u I.i.v combine: Iivu - - R.i-j.u R.x-y.v | i-j in x-y delete first R - R.i-j.u R.i-j.v delete first R - R.i-j.u R.x-y.v | x-y in i-j ERROR - R.i-j.u R.x-y.v | boundaries overlap ERROR - - Delete special case of replace (text==null): - D.i-j.u D.x-y.v | boundaries overlapcombine to - max(min)..max(right) - - I.i.u R.x-y.v | i in (x+1)-ydelete I (since - insert before we're not deleting - i) - I.i.u R.x-y.v | i not in (x+1)-yleave alone, - nonoverlapping - - R.x-y.v I.i.u | i in x-y ERROR - R.x-y.v I.x.u R.x-y.uv (combine, delete I) - R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping - - I.i.u = insert u before op @ index i - R.x-y.u = replace x-y indexed tokens with u - - First we need to examine replaces. For any replace op: - - 1. wipe out any insertions before op within that range. - 2. Drop any replace op before that is contained completely within - that range. - 3. Throw exception upon boundary overlap with any previous replace. - - Then we can deal with inserts: - - 1. for any inserts to same index, combine even if not adjacent. - 2. for any prior replace with same left boundary, combine this - insert with replace and delete this replace. - 3. throw exception if index in same range as previous replace - - Don't actually delete; make op null in list. Easier to walk list. - Later we can throw as we add to index -> op map. - - Note that I.2 R.2-2 will wipe out I.2 even though, technically, the - inserted stuff would be before the replace range. But, if you - add tokens in front of a method body '{' and then delete the method - body, I think the stuff before the '{' you added should disappear too. - - Return a map from token index to operation. - """ - - # WALK REPLACES - for i, rop in enumerate(rewrites): - if rop is None: - continue - - if not isinstance(rop, ReplaceOp): - continue - - # Wipe prior inserts within range - for j, iop in self.getKindOfOps(rewrites, InsertBeforeOp, i): - if iop.index == rop.index: - # E.g., insert before 2, delete 2..2; update replace - # text to include insert before, kill insert - rewrites[iop.instructionIndex] = None - rop.text = self.catOpText(iop.text, rop.text) - - elif iop.index > rop.index and iop.index <= rop.lastIndex: - # delete insert as it's a no-op. - rewrites[j] = None - - # Drop any prior replaces contained within - for j, prevRop in self.getKindOfOps(rewrites, ReplaceOp, i): - if (prevRop.index >= rop.index - and prevRop.lastIndex <= rop.lastIndex): - # delete replace as it's a no-op. - rewrites[j] = None - continue - - # throw exception unless disjoint or identical - disjoint = (prevRop.lastIndex < rop.index - or prevRop.index > rop.lastIndex) - same = (prevRop.index == rop.index - and prevRop.lastIndex == rop.lastIndex) - - # Delete special case of replace (text==null): - # D.i-j.u D.x-y.v| boundaries overlapcombine to - # max(min)..max(right) - if prevRop.text is None and rop.text is None and not disjoint: - # kill first delete - rewrites[prevRop.instructionIndex] = None - - rop.index = min(prevRop.index, rop.index) - rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex) - - elif not disjoint and not same: - raise ValueError( - "replace op boundaries of %s overlap with previous %s" - % (rop, prevRop)) - - # WALK INSERTS - for i, iop in enumerate(rewrites): - if iop is None: - continue - - if not isinstance(iop, InsertBeforeOp): - continue - - # combine current insert with prior if any at same index - for j, prevIop in self.getKindOfOps(rewrites, InsertBeforeOp, i): - if prevIop.index == iop.index: # combine objects - # convert to strings...we're in process of toString'ing - # whole token buffer so no lazy eval issue with any - # templates - iop.text = self.catOpText(iop.text, prevIop.text) - # delete redundant prior insert - rewrites[j] = None - - # look for replaces where iop.index is in range; error - for j, rop in self.getKindOfOps(rewrites, ReplaceOp, i): - if iop.index == rop.index: - rop.text = self.catOpText(iop.text, rop.text) - # delete current insert - rewrites[i] = None - continue - - if iop.index >= rop.index and iop.index <= rop.lastIndex: - raise ValueError( - "insert op %s within boundaries of previous %s" - % (iop, rop)) - - m = {} - for i, op in enumerate(rewrites): - if op is None: - # ignore deleted ops - continue - - assert op.index not in m, "should only be one op per index" - m[op.index] = op - - return m - - - def catOpText(self, a, b): - x = "" - y = "" - if a is not None: - x = a - if b is not None: - y = b - return x + y - - - def getKindOfOps(self, rewrites, kind, before=None): - """Get all operations before an index of a particular kind.""" - - if before is None: - before = len(rewrites) - elif before > len(rewrites): - before = len(rewrites) - - for i, op in enumerate(rewrites[:before]): - if op is None: - # ignore deleted - continue - if op.__class__ == kind: - yield i, op - - - def toDebugString(self, start=None, end=None): - if start is None: - start = self.MIN_TOKEN_INDEX - if end is None: - end = self.size() - 1 - - buf = StringIO() - i = start - while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): - buf.write(self.get(i)) - i += 1 - - return buf.getvalue() diff --git a/thirdparty/antlr3/tokens.py b/thirdparty/antlr3/tokens.py deleted file mode 100644 index d3f39b8e4..000000000 --- a/thirdparty/antlr3/tokens.py +++ /dev/null @@ -1,418 +0,0 @@ -"""ANTLR3 runtime package""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - -from antlr3.constants import EOF, DEFAULT_CHANNEL, INVALID_TOKEN_TYPE - -############################################################################ -# -# basic token interface -# -############################################################################ - -class Token(object): - """@brief Abstract token baseclass.""" - - def getText(self): - """@brief Get the text of the token. - - Using setter/getter methods is deprecated. Use o.text instead. - """ - raise NotImplementedError - - def setText(self, text): - """@brief Set the text of the token. - - Using setter/getter methods is deprecated. Use o.text instead. - """ - raise NotImplementedError - - - def getType(self): - """@brief Get the type of the token. - - Using setter/getter methods is deprecated. Use o.type instead.""" - - raise NotImplementedError - - def setType(self, ttype): - """@brief Get the type of the token. - - Using setter/getter methods is deprecated. Use o.type instead.""" - - raise NotImplementedError - - - def getLine(self): - """@brief Get the line number on which this token was matched - - Lines are numbered 1..n - - Using setter/getter methods is deprecated. Use o.line instead.""" - - raise NotImplementedError - - def setLine(self, line): - """@brief Set the line number on which this token was matched - - Using setter/getter methods is deprecated. Use o.line instead.""" - - raise NotImplementedError - - - def getCharPositionInLine(self): - """@brief Get the column of the tokens first character, - - Columns are numbered 0..n-1 - - Using setter/getter methods is deprecated. Use o.charPositionInLine instead.""" - - raise NotImplementedError - - def setCharPositionInLine(self, pos): - """@brief Set the column of the tokens first character, - - Using setter/getter methods is deprecated. Use o.charPositionInLine instead.""" - - raise NotImplementedError - - - def getChannel(self): - """@brief Get the channel of the token - - Using setter/getter methods is deprecated. Use o.channel instead.""" - - raise NotImplementedError - - def setChannel(self, channel): - """@brief Set the channel of the token - - Using setter/getter methods is deprecated. Use o.channel instead.""" - - raise NotImplementedError - - - def getTokenIndex(self): - """@brief Get the index in the input stream. - - An index from 0..n-1 of the token object in the input stream. - This must be valid in order to use the ANTLRWorks debugger. - - Using setter/getter methods is deprecated. Use o.index instead.""" - - raise NotImplementedError - - def setTokenIndex(self, index): - """@brief Set the index in the input stream. - - Using setter/getter methods is deprecated. Use o.index instead.""" - - raise NotImplementedError - - - def getInputStream(self): - """@brief From what character stream was this token created. - - You don't have to implement but it's nice to know where a Token - comes from if you have include files etc... on the input.""" - - raise NotImplementedError - - def setInputStream(self, input): - """@brief From what character stream was this token created. - - You don't have to implement but it's nice to know where a Token - comes from if you have include files etc... on the input.""" - - raise NotImplementedError - - -############################################################################ -# -# token implementations -# -# Token -# +- CommonToken -# \- ClassicToken -# -############################################################################ - -class CommonToken(Token): - """@brief Basic token implementation. - - This implementation does not copy the text from the input stream upon - creation, but keeps start/stop pointers into the stream to avoid - unnecessary copy operations. - - """ - - def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None, - input=None, start=None, stop=None, oldToken=None): - Token.__init__(self) - - if oldToken is not None: - self.type = oldToken.type - self.line = oldToken.line - self.charPositionInLine = oldToken.charPositionInLine - self.channel = oldToken.channel - self.index = oldToken.index - self._text = oldToken._text - self.input = oldToken.input - if isinstance(oldToken, CommonToken): - self.start = oldToken.start - self.stop = oldToken.stop - - else: - self.type = type - self.input = input - self.charPositionInLine = -1 # set to invalid position - self.line = 0 - self.channel = channel - - #What token number is this from 0..n-1 tokens; < 0 implies invalid index - self.index = -1 - - # We need to be able to change the text once in a while. If - # this is non-null, then getText should return this. Note that - # start/stop are not affected by changing this. - self._text = text - - # The char position into the input buffer where this token starts - self.start = start - - # The char position into the input buffer where this token stops - # This is the index of the last char, *not* the index after it! - self.stop = stop - - - def getText(self): - if self._text is not None: - return self._text - - if self.input is None: - return None - - if self.start < self.input.size() and self.stop < self.input.size(): - return self.input.substring(self.start, self.stop) - - return '' - - - def setText(self, text): - """ - Override the text for this token. getText() will return this text - rather than pulling from the buffer. Note that this does not mean - that start/stop indexes are not valid. It means that that input - was converted to a new string in the token object. - """ - self._text = text - - text = property(getText, setText) - - - def getType(self): - return self.type - - def setType(self, ttype): - self.type = ttype - - def getTypeName(self): - return str(self.type) - - typeName = property(lambda s: s.getTypeName()) - - def getLine(self): - return self.line - - def setLine(self, line): - self.line = line - - - def getCharPositionInLine(self): - return self.charPositionInLine - - def setCharPositionInLine(self, pos): - self.charPositionInLine = pos - - - def getChannel(self): - return self.channel - - def setChannel(self, channel): - self.channel = channel - - - def getTokenIndex(self): - return self.index - - def setTokenIndex(self, index): - self.index = index - - - def getInputStream(self): - return self.input - - def setInputStream(self, input): - self.input = input - - - def __str__(self): - if self.type == EOF: - return "" - - channelStr = "" - if self.channel > 0: - channelStr = ",channel=" + str(self.channel) - - txt = self.text - if txt is not None: - txt = txt.replace("\n","\\\\n") - txt = txt.replace("\r","\\\\r") - txt = txt.replace("\t","\\\\t") - else: - txt = "" - - return "[@%d,%d:%d=%r,<%s>%s,%d:%d]" % ( - self.index, - self.start, self.stop, - txt, - self.typeName, channelStr, - self.line, self.charPositionInLine - ) - - -class ClassicToken(Token): - """@brief Alternative token implementation. - - A Token object like we'd use in ANTLR 2.x; has an actual string created - and associated with this object. These objects are needed for imaginary - tree nodes that have payload objects. We need to create a Token object - that has a string; the tree node will point at this token. CommonToken - has indexes into a char stream and hence cannot be used to introduce - new strings. - """ - - def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL, - oldToken=None - ): - Token.__init__(self) - - if oldToken is not None: - self.text = oldToken.text - self.type = oldToken.type - self.line = oldToken.line - self.charPositionInLine = oldToken.charPositionInLine - self.channel = oldToken.channel - - self.text = text - self.type = type - self.line = None - self.charPositionInLine = None - self.channel = channel - self.index = None - - - def getText(self): - return self.text - - def setText(self, text): - self.text = text - - - def getType(self): - return self.type - - def setType(self, ttype): - self.type = ttype - - - def getLine(self): - return self.line - - def setLine(self, line): - self.line = line - - - def getCharPositionInLine(self): - return self.charPositionInLine - - def setCharPositionInLine(self, pos): - self.charPositionInLine = pos - - - def getChannel(self): - return self.channel - - def setChannel(self, channel): - self.channel = channel - - - def getTokenIndex(self): - return self.index - - def setTokenIndex(self, index): - self.index = index - - - def getInputStream(self): - return None - - def setInputStream(self, input): - pass - - - def toString(self): - channelStr = "" - if self.channel > 0: - channelStr = ",channel=" + str(self.channel) - - txt = self.text - if txt is None: - txt = "" - - return "[@%r,%r,<%r>%s,%r:%r]" % (self.index, - txt, - self.type, - channelStr, - self.line, - self.charPositionInLine - ) - - - __str__ = toString - __repr__ = toString - - -INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) - -# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR -# will avoid creating a token for this symbol and try to fetch another. -SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) diff --git a/thirdparty/antlr3/tree.py b/thirdparty/antlr3/tree.py deleted file mode 100644 index 7bc844673..000000000 --- a/thirdparty/antlr3/tree.py +++ /dev/null @@ -1,2843 +0,0 @@ -""" @package antlr3.tree -@brief ANTLR3 runtime package, tree module - -This module contains all support classes for AST construction and tree parsers. - -""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - -# lot's of docstrings are missing, don't complain for now... -# pylint: disable-msg=C0111 - -import re - -from antlr3.constants import UP, DOWN, EOF, INVALID_TOKEN_TYPE -from antlr3.recognizers import BaseRecognizer, RuleReturnScope -from antlr3.streams import IntStream -from antlr3.tokens import CommonToken, Token, INVALID_TOKEN -from antlr3.exceptions import MismatchedTreeNodeException, \ - MissingTokenException, UnwantedTokenException, MismatchedTokenException, \ - NoViableAltException - - -############################################################################ -# -# tree related exceptions -# -############################################################################ - - -class RewriteCardinalityException(RuntimeError): - """ - @brief Base class for all exceptions thrown during AST rewrite construction. - - This signifies a case where the cardinality of two or more elements - in a subrule are different: (ID INT)+ where |ID|!=|INT| - """ - - def __init__(self, elementDescription): - RuntimeError.__init__(self, elementDescription) - - self.elementDescription = elementDescription - - - def getMessage(self): - return self.elementDescription - - -class RewriteEarlyExitException(RewriteCardinalityException): - """@brief No elements within a (...)+ in a rewrite rule""" - - def __init__(self, elementDescription=None): - RewriteCardinalityException.__init__(self, elementDescription) - - -class RewriteEmptyStreamException(RewriteCardinalityException): - """ - @brief Ref to ID or expr but no tokens in ID stream or subtrees in expr stream - """ - - pass - - -############################################################################ -# -# basic Tree and TreeAdaptor interfaces -# -############################################################################ - -class Tree(object): - """ - @brief Abstract baseclass for tree nodes. - - What does a tree look like? ANTLR has a number of support classes - such as CommonTreeNodeStream that work on these kinds of trees. You - don't have to make your trees implement this interface, but if you do, - you'll be able to use more support code. - - NOTE: When constructing trees, ANTLR can build any kind of tree; it can - even use Token objects as trees if you add a child list to your tokens. - - This is a tree node without any payload; just navigation and factory stuff. - """ - - - def getChild(self, i): - raise NotImplementedError - - - def getChildCount(self): - raise NotImplementedError - - - def getParent(self): - """Tree tracks parent and child index now > 3.0""" - - raise NotImplementedError - - def setParent(self, t): - """Tree tracks parent and child index now > 3.0""" - - raise NotImplementedError - - - def hasAncestor(self, ttype): - """Walk upwards looking for ancestor with this token type.""" - - raise NotImplementedError - - def getAncestor(self, ttype): - """Walk upwards and get first ancestor with this token type.""" - - raise NotImplementedError - - def getAncestors(self): - """Return a list of all ancestors of this node. - - The first node of list is the root and the last is the parent of - this node. - """ - - raise NotImplementedError - - - def getChildIndex(self): - """This node is what child index? 0..n-1""" - - raise NotImplementedError - - def setChildIndex(self, index): - """This node is what child index? 0..n-1""" - - raise NotImplementedError - - - def freshenParentAndChildIndexes(self): - """Set the parent and child index values for all children""" - - raise NotImplementedError - - - def addChild(self, t): - """ - Add t as a child to this node. If t is null, do nothing. If t - is nil, add all children of t to this' children. - """ - - raise NotImplementedError - - - def setChild(self, i, t): - """Set ith child (0..n-1) to t; t must be non-null and non-nil node""" - - raise NotImplementedError - - - def deleteChild(self, i): - raise NotImplementedError - - - def replaceChildren(self, startChildIndex, stopChildIndex, t): - """ - Delete children from start to stop and replace with t even if t is - a list (nil-root tree). num of children can increase or decrease. - For huge child lists, inserting children can force walking rest of - children to set their childindex; could be slow. - """ - - raise NotImplementedError - - - def isNil(self): - """ - Indicates the node is a nil node but may still have children, meaning - the tree is a flat list. - """ - - raise NotImplementedError - - - def getTokenStartIndex(self): - """ - What is the smallest token index (indexing from 0) for this node - and its children? - """ - - raise NotImplementedError - - - def setTokenStartIndex(self, index): - raise NotImplementedError - - - def getTokenStopIndex(self): - """ - What is the largest token index (indexing from 0) for this node - and its children? - """ - - raise NotImplementedError - - - def setTokenStopIndex(self, index): - raise NotImplementedError - - - def dupNode(self): - raise NotImplementedError - - - def getType(self): - """Return a token type; needed for tree parsing.""" - - raise NotImplementedError - - - def getText(self): - raise NotImplementedError - - - def getLine(self): - """ - In case we don't have a token payload, what is the line for errors? - """ - - raise NotImplementedError - - - def getCharPositionInLine(self): - raise NotImplementedError - - - def toStringTree(self): - raise NotImplementedError - - - def toString(self): - raise NotImplementedError - - - -class TreeAdaptor(object): - """ - @brief Abstract baseclass for tree adaptors. - - How to create and navigate trees. Rather than have a separate factory - and adaptor, I've merged them. Makes sense to encapsulate. - - This takes the place of the tree construction code generated in the - generated code in 2.x and the ASTFactory. - - I do not need to know the type of a tree at all so they are all - generic Objects. This may increase the amount of typecasting needed. :( - """ - - # C o n s t r u c t i o n - - def createWithPayload(self, payload): - """ - Create a tree node from Token object; for CommonTree type trees, - then the token just becomes the payload. This is the most - common create call. - - Override if you want another kind of node to be built. - """ - - raise NotImplementedError - - - def dupNode(self, treeNode): - """Duplicate a single tree node. - - Override if you want another kind of node to be built.""" - - raise NotImplementedError - - - def dupTree(self, tree): - """Duplicate tree recursively, using dupNode() for each node""" - - raise NotImplementedError - - - def nil(self): - """ - Return a nil node (an empty but non-null node) that can hold - a list of element as the children. If you want a flat tree (a list) - use "t=adaptor.nil(); t.addChild(x); t.addChild(y);" - """ - - raise NotImplementedError - - - def errorNode(self, input, start, stop, exc): - """ - Return a tree node representing an error. This node records the - tokens consumed during error recovery. The start token indicates the - input symbol at which the error was detected. The stop token indicates - the last symbol consumed during recovery. - - You must specify the input stream so that the erroneous text can - be packaged up in the error node. The exception could be useful - to some applications; default implementation stores ptr to it in - the CommonErrorNode. - - This only makes sense during token parsing, not tree parsing. - Tree parsing should happen only when parsing and tree construction - succeed. - """ - - raise NotImplementedError - - - def isNil(self, tree): - """Is tree considered a nil node used to make lists of child nodes?""" - - raise NotImplementedError - - - def addChild(self, t, child): - """ - Add a child to the tree t. If child is a flat tree (a list), make all - in list children of t. Warning: if t has no children, but child does - and child isNil then you can decide it is ok to move children to t via - t.children = child.children; i.e., without copying the array. Just - make sure that this is consistent with have the user will build - ASTs. Do nothing if t or child is null. - """ - - raise NotImplementedError - - - def becomeRoot(self, newRoot, oldRoot): - """ - If oldRoot is a nil root, just copy or move the children to newRoot. - If not a nil root, make oldRoot a child of newRoot. - - old=^(nil a b c), new=r yields ^(r a b c) - old=^(a b c), new=r yields ^(r ^(a b c)) - - If newRoot is a nil-rooted single child tree, use the single - child as the new root node. - - old=^(nil a b c), new=^(nil r) yields ^(r a b c) - old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) - - If oldRoot was null, it's ok, just return newRoot (even if isNil). - - old=null, new=r yields r - old=null, new=^(nil r) yields ^(nil r) - - Return newRoot. Throw an exception if newRoot is not a - simple node or nil root with a single child node--it must be a root - node. If newRoot is ^(nil x) return x as newRoot. - - Be advised that it's ok for newRoot to point at oldRoot's - children; i.e., you don't have to copy the list. We are - constructing these nodes so we should have this control for - efficiency. - """ - - raise NotImplementedError - - - def rulePostProcessing(self, root): - """ - Given the root of the subtree created for this rule, post process - it to do any simplifications or whatever you want. A required - behavior is to convert ^(nil singleSubtree) to singleSubtree - as the setting of start/stop indexes relies on a single non-nil root - for non-flat trees. - - Flat trees such as for lists like "idlist : ID+ ;" are left alone - unless there is only one ID. For a list, the start/stop indexes - are set in the nil node. - - This method is executed after all rule tree construction and right - before setTokenBoundaries(). - """ - - raise NotImplementedError - - - def getUniqueID(self, node): - """For identifying trees. - - How to identify nodes so we can say "add node to a prior node"? - Even becomeRoot is an issue. Use System.identityHashCode(node) - usually. - """ - - raise NotImplementedError - - - # R e w r i t e R u l e s - - def createFromToken(self, tokenType, fromToken, text=None): - """ - Create a new node derived from a token, with a new token type and - (optionally) new text. - - This is invoked from an imaginary node ref on right side of a - rewrite rule as IMAG[$tokenLabel] or IMAG[$tokenLabel "IMAG"]. - - This should invoke createToken(Token). - """ - - raise NotImplementedError - - - def createFromType(self, tokenType, text): - """Create a new node derived from a token, with a new token type. - - This is invoked from an imaginary node ref on right side of a - rewrite rule as IMAG["IMAG"]. - - This should invoke createToken(int,String). - """ - - raise NotImplementedError - - - # C o n t e n t - - def getType(self, t): - """For tree parsing, I need to know the token type of a node""" - - raise NotImplementedError - - - def setType(self, t, type): - """Node constructors can set the type of a node""" - - raise NotImplementedError - - - def getText(self, t): - raise NotImplementedError - - def setText(self, t, text): - """Node constructors can set the text of a node""" - - raise NotImplementedError - - - def getToken(self, t): - """Return the token object from which this node was created. - - Currently used only for printing an error message. - The error display routine in BaseRecognizer needs to - display where the input the error occurred. If your - tree of limitation does not store information that can - lead you to the token, you can create a token filled with - the appropriate information and pass that back. See - BaseRecognizer.getErrorMessage(). - """ - - raise NotImplementedError - - - def setTokenBoundaries(self, t, startToken, stopToken): - """ - Where are the bounds in the input token stream for this node and - all children? Each rule that creates AST nodes will call this - method right before returning. Flat trees (i.e., lists) will - still usually have a nil root node just to hold the children list. - That node would contain the start/stop indexes then. - """ - - raise NotImplementedError - - - def getTokenStartIndex(self, t): - """ - Get the token start index for this subtree; return -1 if no such index - """ - - raise NotImplementedError - - - def getTokenStopIndex(self, t): - """ - Get the token stop index for this subtree; return -1 if no such index - """ - - raise NotImplementedError - - - # N a v i g a t i o n / T r e e P a r s i n g - - def getChild(self, t, i): - """Get a child 0..n-1 node""" - - raise NotImplementedError - - - def setChild(self, t, i, child): - """Set ith child (0..n-1) to t; t must be non-null and non-nil node""" - - raise NotImplementedError - - - def deleteChild(self, t, i): - """Remove ith child and shift children down from right.""" - - raise NotImplementedError - - - def getChildCount(self, t): - """How many children? If 0, then this is a leaf node""" - - raise NotImplementedError - - - def getParent(self, t): - """ - Who is the parent node of this node; if null, implies node is root. - If your node type doesn't handle this, it's ok but the tree rewrites - in tree parsers need this functionality. - """ - - raise NotImplementedError - - - def setParent(self, t, parent): - """ - Who is the parent node of this node; if null, implies node is root. - If your node type doesn't handle this, it's ok but the tree rewrites - in tree parsers need this functionality. - """ - - raise NotImplementedError - - - def getChildIndex(self, t): - """ - What index is this node in the child list? Range: 0..n-1 - If your node type doesn't handle this, it's ok but the tree rewrites - in tree parsers need this functionality. - """ - - raise NotImplementedError - - - def setChildIndex(self, t, index): - """ - What index is this node in the child list? Range: 0..n-1 - If your node type doesn't handle this, it's ok but the tree rewrites - in tree parsers need this functionality. - """ - - raise NotImplementedError - - - def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): - """ - Replace from start to stop child index of parent with t, which might - be a list. Number of children may be different - after this call. - - If parent is null, don't do anything; must be at root of overall tree. - Can't replace whatever points to the parent externally. Do nothing. - """ - - raise NotImplementedError - - - # Misc - - def create(self, *args): - """ - Deprecated, use createWithPayload, createFromToken or createFromType. - - This method only exists to mimic the Java interface of TreeAdaptor. - - """ - - if len(args) == 1 and isinstance(args[0], Token): - # Object create(Token payload); -## warnings.warn( -## "Using create() is deprecated, use createWithPayload()", -## DeprecationWarning, -## stacklevel=2 -## ) - return self.createWithPayload(args[0]) - - if (len(args) == 2 - and isinstance(args[0], (int, long)) - and isinstance(args[1], Token) - ): - # Object create(int tokenType, Token fromToken); -## warnings.warn( -## "Using create() is deprecated, use createFromToken()", -## DeprecationWarning, -## stacklevel=2 -## ) - return self.createFromToken(args[0], args[1]) - - if (len(args) == 3 - and isinstance(args[0], (int, long)) - and isinstance(args[1], Token) - and isinstance(args[2], basestring) - ): - # Object create(int tokenType, Token fromToken, String text); -## warnings.warn( -## "Using create() is deprecated, use createFromToken()", -## DeprecationWarning, -## stacklevel=2 -## ) - return self.createFromToken(args[0], args[1], args[2]) - - if (len(args) == 2 - and isinstance(args[0], (int, long)) - and isinstance(args[1], basestring) - ): - # Object create(int tokenType, String text); -## warnings.warn( -## "Using create() is deprecated, use createFromType()", -## DeprecationWarning, -## stacklevel=2 -## ) - return self.createFromType(args[0], args[1]) - - raise TypeError( - "No create method with this signature found: %s" - % (', '.join(type(v).__name__ for v in args)) - ) - - -############################################################################ -# -# base implementation of Tree and TreeAdaptor -# -# Tree -# \- BaseTree -# -# TreeAdaptor -# \- BaseTreeAdaptor -# -############################################################################ - - -class BaseTree(Tree): - """ - @brief A generic tree implementation with no payload. - - You must subclass to - actually have any user data. ANTLR v3 uses a list of children approach - instead of the child-sibling approach in v2. A flat tree (a list) is - an empty node whose children represent the list. An empty, but - non-null node is called "nil". - """ - - # BaseTree is abstract, no need to complain about not implemented abstract - # methods - # pylint: disable-msg=W0223 - - def __init__(self, node=None): - """ - Create a new node from an existing node does nothing for BaseTree - as there are no fields other than the children list, which cannot - be copied as the children are not considered part of this node. - """ - - Tree.__init__(self) - self.children = [] - self.parent = None - self.childIndex = 0 - - - def getChild(self, i): - try: - return self.children[i] - except IndexError: - return None - - - def getChildren(self): - """@brief Get the children internal List - - Note that if you directly mess with - the list, do so at your own risk. - """ - - # FIXME: mark as deprecated - return self.children - - - def getFirstChildWithType(self, treeType): - for child in self.children: - if child.getType() == treeType: - return child - - return None - - - def getChildCount(self): - return len(self.children) - - - def addChild(self, childTree): - """Add t as child of this node. - - Warning: if t has no children, but child does - and child isNil then this routine moves children to t via - t.children = child.children; i.e., without copying the array. - """ - - # this implementation is much simpler and probably less efficient - # than the mumbo-jumbo that Ter did for the Java runtime. - - if childTree is None: - return - - if childTree.isNil(): - # t is an empty node possibly with children - - if self.children is childTree.children: - raise ValueError("attempt to add child list to itself") - - # fix parent pointer and childIndex for new children - for idx, child in enumerate(childTree.children): - child.parent = self - child.childIndex = len(self.children) + idx - - self.children += childTree.children - - else: - # child is not nil (don't care about children) - self.children.append(childTree) - childTree.parent = self - childTree.childIndex = len(self.children) - 1 - - - def addChildren(self, children): - """Add all elements of kids list as children of this node""" - - self.children += children - - - def setChild(self, i, t): - if t is None: - return - - if t.isNil(): - raise ValueError("Can't set single child to a list") - - self.children[i] = t - t.parent = self - t.childIndex = i - - - def deleteChild(self, i): - killed = self.children[i] - - del self.children[i] - - # walk rest and decrement their child indexes - for idx, child in enumerate(self.children[i:]): - child.childIndex = i + idx - - return killed - - - def replaceChildren(self, startChildIndex, stopChildIndex, newTree): - """ - Delete children from start to stop and replace with t even if t is - a list (nil-root tree). num of children can increase or decrease. - For huge child lists, inserting children can force walking rest of - children to set their childindex; could be slow. - """ - - if (startChildIndex >= len(self.children) - or stopChildIndex >= len(self.children) - ): - raise IndexError("indexes invalid") - - replacingHowMany = stopChildIndex - startChildIndex + 1 - - # normalize to a list of children to add: newChildren - if newTree.isNil(): - newChildren = newTree.children - - else: - newChildren = [newTree] - - replacingWithHowMany = len(newChildren) - delta = replacingHowMany - replacingWithHowMany - - - if delta == 0: - # if same number of nodes, do direct replace - for idx, child in enumerate(newChildren): - self.children[idx + startChildIndex] = child - child.parent = self - child.childIndex = idx + startChildIndex - - else: - # length of children changes... - - # ...delete replaced segment... - del self.children[startChildIndex:stopChildIndex+1] - - # ...insert new segment... - self.children[startChildIndex:startChildIndex] = newChildren - - # ...and fix indeces - self.freshenParentAndChildIndexes(startChildIndex) - - - def isNil(self): - return False - - - def freshenParentAndChildIndexes(self, offset=0): - for idx, child in enumerate(self.children[offset:]): - child.childIndex = idx + offset - child.parent = self - - - def sanityCheckParentAndChildIndexes(self, parent=None, i=-1): - if parent != self.parent: - raise ValueError( - "parents don't match; expected %r found %r" - % (parent, self.parent) - ) - - if i != self.childIndex: - raise ValueError( - "child indexes don't match; expected %d found %d" - % (i, self.childIndex) - ) - - for idx, child in enumerate(self.children): - child.sanityCheckParentAndChildIndexes(self, idx) - - - def getChildIndex(self): - """BaseTree doesn't track child indexes.""" - - return 0 - - - def setChildIndex(self, index): - """BaseTree doesn't track child indexes.""" - - pass - - - def getParent(self): - """BaseTree doesn't track parent pointers.""" - - return None - - def setParent(self, t): - """BaseTree doesn't track parent pointers.""" - - pass - - - def hasAncestor(self, ttype): - """Walk upwards looking for ancestor with this token type.""" - return self.getAncestor(ttype) is not None - - def getAncestor(self, ttype): - """Walk upwards and get first ancestor with this token type.""" - t = self.getParent() - while t is not None: - if t.getType() == ttype: - return t - t = t.getParent() - - return None - - def getAncestors(self): - """Return a list of all ancestors of this node. - - The first node of list is the root and the last is the parent of - this node. - """ - if selfgetParent() is None: - return None - - ancestors = [] - t = self.getParent() - while t is not None: - ancestors.insert(0, t) # insert at start - t = t.getParent() - - return ancestors - - - def toStringTree(self): - """Print out a whole tree not just a node""" - - if len(self.children) == 0: - return self.toString() - - buf = [] - if not self.isNil(): - buf.append('(') - buf.append(self.toString()) - buf.append(' ') - - for i, child in enumerate(self.children): - if i > 0: - buf.append(' ') - buf.append(child.toStringTree()) - - if not self.isNil(): - buf.append(')') - - return ''.join(buf) - - - def getLine(self): - return 0 - - - def getCharPositionInLine(self): - return 0 - - - def toString(self): - """Override to say how a node (not a tree) should look as text""" - - raise NotImplementedError - - - -class BaseTreeAdaptor(TreeAdaptor): - """ - @brief A TreeAdaptor that works with any Tree implementation. - """ - - # BaseTreeAdaptor is abstract, no need to complain about not implemented - # abstract methods - # pylint: disable-msg=W0223 - - def nil(self): - return self.createWithPayload(None) - - - def errorNode(self, input, start, stop, exc): - """ - create tree node that holds the start and stop tokens associated - with an error. - - If you specify your own kind of tree nodes, you will likely have to - override this method. CommonTree returns Token.INVALID_TOKEN_TYPE - if no token payload but you might have to set token type for diff - node type. - - You don't have to subclass CommonErrorNode; you will likely need to - subclass your own tree node class to avoid class cast exception. - """ - - return CommonErrorNode(input, start, stop, exc) - - - def isNil(self, tree): - return tree.isNil() - - - def dupTree(self, t, parent=None): - """ - This is generic in the sense that it will work with any kind of - tree (not just Tree interface). It invokes the adaptor routines - not the tree node routines to do the construction. - """ - - if t is None: - return None - - newTree = self.dupNode(t) - - # ensure new subtree root has parent/child index set - - # same index in new tree - self.setChildIndex(newTree, self.getChildIndex(t)) - - self.setParent(newTree, parent) - - for i in range(self.getChildCount(t)): - child = self.getChild(t, i) - newSubTree = self.dupTree(child, t) - self.addChild(newTree, newSubTree) - - return newTree - - - def addChild(self, tree, child): - """ - Add a child to the tree t. If child is a flat tree (a list), make all - in list children of t. Warning: if t has no children, but child does - and child isNil then you can decide it is ok to move children to t via - t.children = child.children; i.e., without copying the array. Just - make sure that this is consistent with have the user will build - ASTs. - """ - - #if isinstance(child, Token): - # child = self.createWithPayload(child) - - if tree is not None and child is not None: - tree.addChild(child) - - - def becomeRoot(self, newRoot, oldRoot): - """ - If oldRoot is a nil root, just copy or move the children to newRoot. - If not a nil root, make oldRoot a child of newRoot. - - old=^(nil a b c), new=r yields ^(r a b c) - old=^(a b c), new=r yields ^(r ^(a b c)) - - If newRoot is a nil-rooted single child tree, use the single - child as the new root node. - - old=^(nil a b c), new=^(nil r) yields ^(r a b c) - old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) - - If oldRoot was null, it's ok, just return newRoot (even if isNil). - - old=null, new=r yields r - old=null, new=^(nil r) yields ^(nil r) - - Return newRoot. Throw an exception if newRoot is not a - simple node or nil root with a single child node--it must be a root - node. If newRoot is ^(nil x) return x as newRoot. - - Be advised that it's ok for newRoot to point at oldRoot's - children; i.e., you don't have to copy the list. We are - constructing these nodes so we should have this control for - efficiency. - """ - - if isinstance(newRoot, Token): - newRoot = self.create(newRoot) - - if oldRoot is None: - return newRoot - - if not isinstance(newRoot, CommonTree): - newRoot = self.createWithPayload(newRoot) - - # handle ^(nil real-node) - if newRoot.isNil(): - nc = newRoot.getChildCount() - if nc == 1: - newRoot = newRoot.getChild(0) - - elif nc > 1: - # TODO: make tree run time exceptions hierarchy - raise RuntimeError("more than one node as root") - - # add oldRoot to newRoot; addChild takes care of case where oldRoot - # is a flat list (i.e., nil-rooted tree). All children of oldRoot - # are added to newRoot. - newRoot.addChild(oldRoot) - return newRoot - - - def rulePostProcessing(self, root): - """Transform ^(nil x) to x and nil to null""" - - if root is not None and root.isNil(): - if root.getChildCount() == 0: - root = None - - elif root.getChildCount() == 1: - root = root.getChild(0) - # whoever invokes rule will set parent and child index - root.setParent(None) - root.setChildIndex(-1) - - return root - - - def createFromToken(self, tokenType, fromToken, text=None): - if fromToken is None: - return self.createFromType(tokenType, text) - - assert isinstance(tokenType, (int, long)), type(tokenType).__name__ - assert isinstance(fromToken, Token), type(fromToken).__name__ - assert text is None or isinstance(text, basestring), type(text).__name__ - - fromToken = self.createToken(fromToken) - fromToken.type = tokenType - if text is not None: - fromToken.text = text - t = self.createWithPayload(fromToken) - return t - - - def createFromType(self, tokenType, text): - assert isinstance(tokenType, (int, long)), type(tokenType).__name__ - assert isinstance(text, basestring) or text is None, type(text).__name__ - - fromToken = self.createToken(tokenType=tokenType, text=text) - t = self.createWithPayload(fromToken) - return t - - - def getType(self, t): - return t.getType() - - - def setType(self, t, type): - raise RuntimeError("don't know enough about Tree node") - - - def getText(self, t): - return t.getText() - - - def setText(self, t, text): - raise RuntimeError("don't know enough about Tree node") - - - def getChild(self, t, i): - return t.getChild(i) - - - def setChild(self, t, i, child): - t.setChild(i, child) - - - def deleteChild(self, t, i): - return t.deleteChild(i) - - - def getChildCount(self, t): - return t.getChildCount() - - - def getUniqueID(self, node): - return hash(node) - - - def createToken(self, fromToken=None, tokenType=None, text=None): - """ - Tell me how to create a token for use with imaginary token nodes. - For example, there is probably no input symbol associated with imaginary - token DECL, but you need to create it as a payload or whatever for - the DECL node as in ^(DECL type ID). - - If you care what the token payload objects' type is, you should - override this method and any other createToken variant. - """ - - raise NotImplementedError - - -############################################################################ -# -# common tree implementation -# -# Tree -# \- BaseTree -# \- CommonTree -# \- CommonErrorNode -# -# TreeAdaptor -# \- BaseTreeAdaptor -# \- CommonTreeAdaptor -# -############################################################################ - - -class CommonTree(BaseTree): - """@brief A tree node that is wrapper for a Token object. - - After 3.0 release - while building tree rewrite stuff, it became clear that computing - parent and child index is very difficult and cumbersome. Better to - spend the space in every tree node. If you don't want these extra - fields, it's easy to cut them out in your own BaseTree subclass. - - """ - - def __init__(self, payload): - BaseTree.__init__(self) - - # What token indexes bracket all tokens associated with this node - # and below? - self.startIndex = -1 - self.stopIndex = -1 - - # Who is the parent node of this node; if null, implies node is root - self.parent = None - - # What index is this node in the child list? Range: 0..n-1 - self.childIndex = -1 - - # A single token is the payload - if payload is None: - self.token = None - - elif isinstance(payload, CommonTree): - self.token = payload.token - self.startIndex = payload.startIndex - self.stopIndex = payload.stopIndex - - elif payload is None or isinstance(payload, Token): - self.token = payload - - else: - raise TypeError(type(payload).__name__) - - - - def getToken(self): - return self.token - - - def dupNode(self): - return CommonTree(self) - - - def isNil(self): - return self.token is None - - - def getType(self): - if self.token is None: - return INVALID_TOKEN_TYPE - - return self.token.getType() - - type = property(getType) - - - def getText(self): - if self.token is None: - return None - - return self.token.text - - text = property(getText) - - - def getLine(self): - if self.token is None or self.token.getLine() == 0: - if self.getChildCount(): - return self.getChild(0).getLine() - else: - return 0 - - return self.token.getLine() - - line = property(getLine) - - - def getCharPositionInLine(self): - if self.token is None or self.token.getCharPositionInLine() == -1: - if self.getChildCount(): - return self.getChild(0).getCharPositionInLine() - else: - return 0 - - else: - return self.token.getCharPositionInLine() - - charPositionInLine = property(getCharPositionInLine) - - - def getTokenStartIndex(self): - if self.startIndex == -1 and self.token is not None: - return self.token.getTokenIndex() - - return self.startIndex - - def setTokenStartIndex(self, index): - self.startIndex = index - - tokenStartIndex = property(getTokenStartIndex, setTokenStartIndex) - - - def getTokenStopIndex(self): - if self.stopIndex == -1 and self.token is not None: - return self.token.getTokenIndex() - - return self.stopIndex - - def setTokenStopIndex(self, index): - self.stopIndex = index - - tokenStopIndex = property(getTokenStopIndex, setTokenStopIndex) - - - def setUnknownTokenBoundaries(self): - """For every node in this subtree, make sure it's start/stop token's - are set. Walk depth first, visit bottom up. Only updates nodes - with at least one token index < 0. - """ - - if self.children is None: - if self.startIndex < 0 or self.stopIndex < 0: - self.startIndex = self.stopIndex = self.token.getTokenIndex() - - return - - for child in self.children: - child.setUnknownTokenBoundaries() - - if self.startIndex >= 0 and self.stopIndex >= 0: - # already set - return - - if self.children: - firstChild = self.children[0] - lastChild = self.children[-1] - self.startIndex = firstChild.getTokenStartIndex() - self.stopIndex = lastChild.getTokenStopIndex() - - - def getChildIndex(self): - #FIXME: mark as deprecated - return self.childIndex - - - def setChildIndex(self, idx): - #FIXME: mark as deprecated - self.childIndex = idx - - - def getParent(self): - #FIXME: mark as deprecated - return self.parent - - - def setParent(self, t): - #FIXME: mark as deprecated - self.parent = t - - - def toString(self): - if self.isNil(): - return "nil" - - if self.getType() == INVALID_TOKEN_TYPE: - return "" - - return self.token.text - - __str__ = toString - - - - def toStringTree(self): - if not self.children: - return self.toString() - - ret = '' - if not self.isNil(): - ret += '(%s ' % (self.toString()) - - ret += ' '.join([child.toStringTree() for child in self.children]) - - if not self.isNil(): - ret += ')' - - return ret - - -INVALID_NODE = CommonTree(INVALID_TOKEN) - - -class CommonErrorNode(CommonTree): - """A node representing erroneous token range in token stream""" - - def __init__(self, input, start, stop, exc): - CommonTree.__init__(self, None) - - if (stop is None or - (stop.getTokenIndex() < start.getTokenIndex() and - stop.getType() != EOF - ) - ): - # sometimes resync does not consume a token (when LT(1) is - # in follow set. So, stop will be 1 to left to start. adjust. - # Also handle case where start is the first token and no token - # is consumed during recovery; LT(-1) will return null. - stop = start - - self.input = input - self.start = start - self.stop = stop - self.trappedException = exc - - - def isNil(self): - return False - - - def getType(self): - return INVALID_TOKEN_TYPE - - - def getText(self): - if isinstance(self.start, Token): - i = self.start.getTokenIndex() - j = self.stop.getTokenIndex() - if self.stop.getType() == EOF: - j = self.input.size() - - badText = self.input.toString(i, j) - - elif isinstance(self.start, Tree): - badText = self.input.toString(self.start, self.stop) - - else: - # people should subclass if they alter the tree type so this - # next one is for sure correct. - badText = "" - - return badText - - - def toString(self): - if isinstance(self.trappedException, MissingTokenException): - return ("") - - elif isinstance(self.trappedException, UnwantedTokenException): - return ("") - - elif isinstance(self.trappedException, MismatchedTokenException): - return ("") - - elif isinstance(self.trappedException, NoViableAltException): - return ("") - - return "" - - -class CommonTreeAdaptor(BaseTreeAdaptor): - """ - @brief A TreeAdaptor that works with any Tree implementation. - - It provides - really just factory methods; all the work is done by BaseTreeAdaptor. - If you would like to have different tokens created than ClassicToken - objects, you need to override this and then set the parser tree adaptor to - use your subclass. - - To get your parser to build nodes of a different type, override - create(Token), errorNode(), and to be safe, YourTreeClass.dupNode(). - dupNode is called to duplicate nodes during rewrite operations. - """ - - def dupNode(self, treeNode): - """ - Duplicate a node. This is part of the factory; - override if you want another kind of node to be built. - - I could use reflection to prevent having to override this - but reflection is slow. - """ - - if treeNode is None: - return None - - return treeNode.dupNode() - - - def createWithPayload(self, payload): - return CommonTree(payload) - - - def createToken(self, fromToken=None, tokenType=None, text=None): - """ - Tell me how to create a token for use with imaginary token nodes. - For example, there is probably no input symbol associated with imaginary - token DECL, but you need to create it as a payload or whatever for - the DECL node as in ^(DECL type ID). - - If you care what the token payload objects' type is, you should - override this method and any other createToken variant. - """ - - if fromToken is not None: - return CommonToken(oldToken=fromToken) - - return CommonToken(type=tokenType, text=text) - - - def setTokenBoundaries(self, t, startToken, stopToken): - """ - Track start/stop token for subtree root created for a rule. - Only works with Tree nodes. For rules that match nothing, - seems like this will yield start=i and stop=i-1 in a nil node. - Might be useful info so I'll not force to be i..i. - """ - - if t is None: - return - - start = 0 - stop = 0 - - if startToken is not None: - start = startToken.index - - if stopToken is not None: - stop = stopToken.index - - t.setTokenStartIndex(start) - t.setTokenStopIndex(stop) - - - def getTokenStartIndex(self, t): - if t is None: - return -1 - return t.getTokenStartIndex() - - - def getTokenStopIndex(self, t): - if t is None: - return -1 - return t.getTokenStopIndex() - - - def getText(self, t): - if t is None: - return None - return t.getText() - - - def getType(self, t): - if t is None: - return INVALID_TOKEN_TYPE - - return t.getType() - - - def getToken(self, t): - """ - What is the Token associated with this node? If - you are not using CommonTree, then you must - override this in your own adaptor. - """ - - if isinstance(t, CommonTree): - return t.getToken() - - return None # no idea what to do - - - def getChild(self, t, i): - if t is None: - return None - return t.getChild(i) - - - def getChildCount(self, t): - if t is None: - return 0 - return t.getChildCount() - - - def getParent(self, t): - return t.getParent() - - - def setParent(self, t, parent): - t.setParent(parent) - - - def getChildIndex(self, t): - if t is None: - return 0 - return t.getChildIndex() - - - def setChildIndex(self, t, index): - t.setChildIndex(index) - - - def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): - if parent is not None: - parent.replaceChildren(startChildIndex, stopChildIndex, t) - - -############################################################################ -# -# streams -# -# TreeNodeStream -# \- BaseTree -# \- CommonTree -# -# TreeAdaptor -# \- BaseTreeAdaptor -# \- CommonTreeAdaptor -# -############################################################################ - - - -class TreeNodeStream(IntStream): - """@brief A stream of tree nodes - - It accessing nodes from a tree of some kind. - """ - - # TreeNodeStream is abstract, no need to complain about not implemented - # abstract methods - # pylint: disable-msg=W0223 - - def get(self, i): - """Get a tree node at an absolute index i; 0..n-1. - If you don't want to buffer up nodes, then this method makes no - sense for you. - """ - - raise NotImplementedError - - - def LT(self, k): - """ - Get tree node at current input pointer + i ahead where i=1 is next node. - i<0 indicates nodes in the past. So LT(-1) is previous node, but - implementations are not required to provide results for k < -1. - LT(0) is undefined. For i>=n, return null. - Return null for LT(0) and any index that results in an absolute address - that is negative. - - This is analogus to the LT() method of the TokenStream, but this - returns a tree node instead of a token. Makes code gen identical - for both parser and tree grammars. :) - """ - - raise NotImplementedError - - - def getTreeSource(self): - """ - Where is this stream pulling nodes from? This is not the name, but - the object that provides node objects. - """ - - raise NotImplementedError - - - def getTokenStream(self): - """ - If the tree associated with this stream was created from a TokenStream, - you can specify it here. Used to do rule $text attribute in tree - parser. Optional unless you use tree parser rule text attribute - or output=template and rewrite=true options. - """ - - raise NotImplementedError - - - def getTreeAdaptor(self): - """ - What adaptor can tell me how to interpret/navigate nodes and - trees. E.g., get text of a node. - """ - - raise NotImplementedError - - - def setUniqueNavigationNodes(self, uniqueNavigationNodes): - """ - As we flatten the tree, we use UP, DOWN nodes to represent - the tree structure. When debugging we need unique nodes - so we have to instantiate new ones. When doing normal tree - parsing, it's slow and a waste of memory to create unique - navigation nodes. Default should be false; - """ - - raise NotImplementedError - - - def reset(self): - """ - Reset the tree node stream in such a way that it acts like - a freshly constructed stream. - """ - - raise NotImplementedError - - - def toString(self, start, stop): - """ - Return the text of all nodes from start to stop, inclusive. - If the stream does not buffer all the nodes then it can still - walk recursively from start until stop. You can always return - null or "" too, but users should not access $ruleLabel.text in - an action of course in that case. - """ - - raise NotImplementedError - - - # REWRITING TREES (used by tree parser) - def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): - """ - Replace from start to stop child index of parent with t, which might - be a list. Number of children may be different - after this call. The stream is notified because it is walking the - tree and might need to know you are monkeying with the underlying - tree. Also, it might be able to modify the node stream to avoid - restreaming for future phases. - - If parent is null, don't do anything; must be at root of overall tree. - Can't replace whatever points to the parent externally. Do nothing. - """ - - raise NotImplementedError - - -class CommonTreeNodeStream(TreeNodeStream): - """@brief A buffered stream of tree nodes. - - Nodes can be from a tree of ANY kind. - - This node stream sucks all nodes out of the tree specified in - the constructor during construction and makes pointers into - the tree using an array of Object pointers. The stream necessarily - includes pointers to DOWN and UP and EOF nodes. - - This stream knows how to mark/release for backtracking. - - This stream is most suitable for tree interpreters that need to - jump around a lot or for tree parsers requiring speed (at cost of memory). - There is some duplicated functionality here with UnBufferedTreeNodeStream - but just in bookkeeping, not tree walking etc... - - @see UnBufferedTreeNodeStream - """ - - def __init__(self, *args): - TreeNodeStream.__init__(self) - - if len(args) == 1: - adaptor = CommonTreeAdaptor() - tree = args[0] - - nodes = None - down = None - up = None - eof = None - - elif len(args) == 2: - adaptor = args[0] - tree = args[1] - - nodes = None - down = None - up = None - eof = None - - elif len(args) == 3: - parent = args[0] - start = args[1] - stop = args[2] - - adaptor = parent.adaptor - tree = parent.root - - nodes = parent.nodes[start:stop] - down = parent.down - up = parent.up - eof = parent.eof - - else: - raise TypeError("Invalid arguments") - - # all these navigation nodes are shared and hence they - # cannot contain any line/column info - if down is not None: - self.down = down - else: - self.down = adaptor.createFromType(DOWN, "DOWN") - - if up is not None: - self.up = up - else: - self.up = adaptor.createFromType(UP, "UP") - - if eof is not None: - self.eof = eof - else: - self.eof = adaptor.createFromType(EOF, "EOF") - - # The complete mapping from stream index to tree node. - # This buffer includes pointers to DOWN, UP, and EOF nodes. - # It is built upon ctor invocation. The elements are type - # Object as we don't what the trees look like. - - # Load upon first need of the buffer so we can set token types - # of interest for reverseIndexing. Slows us down a wee bit to - # do all of the if p==-1 testing everywhere though. - if nodes is not None: - self.nodes = nodes - else: - self.nodes = [] - - # Pull nodes from which tree? - self.root = tree - - # IF this tree (root) was created from a token stream, track it. - self.tokens = None - - # What tree adaptor was used to build these trees - self.adaptor = adaptor - - # Reuse same DOWN, UP navigation nodes unless this is true - self.uniqueNavigationNodes = False - - # The index into the nodes list of the current node (next node - # to consume). If -1, nodes array not filled yet. - self.p = -1 - - # Track the last mark() call result value for use in rewind(). - self.lastMarker = None - - # Stack of indexes used for push/pop calls - self.calls = [] - - - def __iter__(self): - return TreeIterator(self.root, self.adaptor) - - - def fillBuffer(self): - """Walk tree with depth-first-search and fill nodes buffer. - Don't do DOWN, UP nodes if its a list (t is isNil). - """ - - self._fillBuffer(self.root) - self.p = 0 # buffer of nodes intialized now - - - def _fillBuffer(self, t): - nil = self.adaptor.isNil(t) - - if not nil: - self.nodes.append(t) # add this node - - # add DOWN node if t has children - n = self.adaptor.getChildCount(t) - if not nil and n > 0: - self.addNavigationNode(DOWN) - - # and now add all its children - for c in range(n): - self._fillBuffer(self.adaptor.getChild(t, c)) - - # add UP node if t has children - if not nil and n > 0: - self.addNavigationNode(UP) - - - def getNodeIndex(self, node): - """What is the stream index for node? 0..n-1 - Return -1 if node not found. - """ - - if self.p == -1: - self.fillBuffer() - - for i, t in enumerate(self.nodes): - if t == node: - return i - - return -1 - - - def addNavigationNode(self, ttype): - """ - As we flatten the tree, we use UP, DOWN nodes to represent - the tree structure. When debugging we need unique nodes - so instantiate new ones when uniqueNavigationNodes is true. - """ - - navNode = None - - if ttype == DOWN: - if self.hasUniqueNavigationNodes(): - navNode = self.adaptor.createFromType(DOWN, "DOWN") - - else: - navNode = self.down - - else: - if self.hasUniqueNavigationNodes(): - navNode = self.adaptor.createFromType(UP, "UP") - - else: - navNode = self.up - - self.nodes.append(navNode) - - - def get(self, i): - if self.p == -1: - self.fillBuffer() - - return self.nodes[i] - - - def LT(self, k): - if self.p == -1: - self.fillBuffer() - - if k == 0: - return None - - if k < 0: - return self.LB(-k) - - if self.p + k - 1 >= len(self.nodes): - return self.eof - - return self.nodes[self.p + k - 1] - - - def getCurrentSymbol(self): - return self.LT(1) - - - def LB(self, k): - """Look backwards k nodes""" - - if k == 0: - return None - - if self.p - k < 0: - return None - - return self.nodes[self.p - k] - - - def isEOF(self, obj): - return self.adaptor.getType(obj) == EOF - - - def getTreeSource(self): - return self.root - - - def getSourceName(self): - return self.getTokenStream().getSourceName() - - - def getTokenStream(self): - return self.tokens - - - def setTokenStream(self, tokens): - self.tokens = tokens - - - def getTreeAdaptor(self): - return self.adaptor - - - def hasUniqueNavigationNodes(self): - return self.uniqueNavigationNodes - - - def setUniqueNavigationNodes(self, uniqueNavigationNodes): - self.uniqueNavigationNodes = uniqueNavigationNodes - - - def consume(self): - if self.p == -1: - self.fillBuffer() - - self.p += 1 - - - def LA(self, i): - return self.adaptor.getType(self.LT(i)) - - - def mark(self): - if self.p == -1: - self.fillBuffer() - - - self.lastMarker = self.index() - return self.lastMarker - - - def release(self, marker=None): - # no resources to release - - pass - - - def index(self): - return self.p - - - def rewind(self, marker=None): - if marker is None: - marker = self.lastMarker - - self.seek(marker) - - - def seek(self, index): - if self.p == -1: - self.fillBuffer() - - self.p = index - - - def push(self, index): - """ - Make stream jump to a new location, saving old location. - Switch back with pop(). - """ - - self.calls.append(self.p) # save current index - self.seek(index) - - - def pop(self): - """ - Seek back to previous index saved during last push() call. - Return top of stack (return index). - """ - - ret = self.calls.pop(-1) - self.seek(ret) - return ret - - - def reset(self): - self.p = 0 - self.lastMarker = 0 - self.calls = [] - - - def size(self): - if self.p == -1: - self.fillBuffer() - - return len(self.nodes) - - - # TREE REWRITE INTERFACE - - def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): - if parent is not None: - self.adaptor.replaceChildren( - parent, startChildIndex, stopChildIndex, t - ) - - - def __str__(self): - """Used for testing, just return the token type stream""" - - if self.p == -1: - self.fillBuffer() - - return ' '.join([str(self.adaptor.getType(node)) - for node in self.nodes - ]) - - - def toString(self, start, stop): - if start is None or stop is None: - return None - - if self.p == -1: - self.fillBuffer() - - #System.out.println("stop: "+stop); - #if ( start instanceof CommonTree ) - # System.out.print("toString: "+((CommonTree)start).getToken()+", "); - #else - # System.out.println(start); - #if ( stop instanceof CommonTree ) - # System.out.println(((CommonTree)stop).getToken()); - #else - # System.out.println(stop); - - # if we have the token stream, use that to dump text in order - if self.tokens is not None: - beginTokenIndex = self.adaptor.getTokenStartIndex(start) - endTokenIndex = self.adaptor.getTokenStopIndex(stop) - - # if it's a tree, use start/stop index from start node - # else use token range from start/stop nodes - if self.adaptor.getType(stop) == UP: - endTokenIndex = self.adaptor.getTokenStopIndex(start) - - elif self.adaptor.getType(stop) == EOF: - endTokenIndex = self.size() -2 # don't use EOF - - return self.tokens.toString(beginTokenIndex, endTokenIndex) - - # walk nodes looking for start - i, t = 0, None - for i, t in enumerate(self.nodes): - if t == start: - break - - # now walk until we see stop, filling string buffer with text - buf = [] - t = self.nodes[i] - while t != stop: - text = self.adaptor.getText(t) - if text is None: - text = " " + self.adaptor.getType(t) - - buf.append(text) - i += 1 - t = self.nodes[i] - - # include stop node too - text = self.adaptor.getText(stop) - if text is None: - text = " " +self.adaptor.getType(stop) - - buf.append(text) - - return ''.join(buf) - - - ## iterator interface - def __iter__(self): - if self.p == -1: - self.fillBuffer() - - for node in self.nodes: - yield node - - -############################################################################# -# -# tree parser -# -############################################################################# - -class TreeParser(BaseRecognizer): - """@brief Baseclass for generated tree parsers. - - A parser for a stream of tree nodes. "tree grammars" result in a subclass - of this. All the error reporting and recovery is shared with Parser via - the BaseRecognizer superclass. - """ - - def __init__(self, input, state=None): - BaseRecognizer.__init__(self, state) - - self.input = None - self.setTreeNodeStream(input) - - - def reset(self): - BaseRecognizer.reset(self) # reset all recognizer state variables - if self.input is not None: - self.input.seek(0) # rewind the input - - - def setTreeNodeStream(self, input): - """Set the input stream""" - - self.input = input - - - def getTreeNodeStream(self): - return self.input - - - def getSourceName(self): - return self.input.getSourceName() - - - def getCurrentInputSymbol(self, input): - return input.LT(1) - - - def getMissingSymbol(self, input, e, expectedTokenType, follow): - tokenText = "" - adaptor = input.adaptor - return adaptor.createToken( - CommonToken(type=expectedTokenType, text=tokenText)) - - - # precompiled regex used by inContext - dotdot = ".*[^.]\\.\\.[^.].*" - doubleEtc = ".*\\.\\.\\.\\s+\\.\\.\\..*" - dotdotPattern = re.compile(dotdot) - doubleEtcPattern = re.compile(doubleEtc) - - def inContext(self, context, adaptor=None, tokenName=None, t=None): - """Check if current node in input has a context. - - Context means sequence of nodes towards root of tree. For example, - you might say context is "MULT" which means my parent must be MULT. - "CLASS VARDEF" says current node must be child of a VARDEF and whose - parent is a CLASS node. You can use "..." to mean zero-or-more nodes. - "METHOD ... VARDEF" means my parent is VARDEF and somewhere above - that is a METHOD node. The first node in the context is not - necessarily the root. The context matcher stops matching and returns - true when it runs out of context. There is no way to force the first - node to be the root. - """ - - return _inContext( - self.input.getTreeAdaptor(), self.getTokenNames(), - self.input.LT(1), context) - - @classmethod - def _inContext(cls, adaptor, tokenNames, t, context): - """The worker for inContext. - - It's static and full of parameters for testing purposes. - """ - - if cls.dotdotPattern.match(context): - # don't allow "..", must be "..." - raise ValueError("invalid syntax: ..") - - if cls.doubleEtcPattern.match(context): - # don't allow double "..." - raise ValueError("invalid syntax: ... ...") - - # ensure spaces around ... - context = context.replace("...", " ... ") - context = context.strip() - nodes = context.split() - - ni = len(nodes) - 1 - t = adaptor.getParent(t) - while ni >= 0 and t is not None: - if nodes[ni] == "...": - # walk upwards until we see nodes[ni-1] then continue walking - if ni == 0: - # ... at start is no-op - return True - goal = nodes[ni-1] - ancestor = cls._getAncestor(adaptor, tokenNames, t, goal) - if ancestor is None: - return False - t = ancestor - ni -= 1 - - name = tokenNames[adaptor.getType(t)] - if name != nodes[ni]: - return False - - # advance to parent and to previous element in context node list - ni -= 1 - t = adaptor.getParent(t) - - # at root but more nodes to match - if t is None and ni >= 0: - return False - - return True - - @staticmethod - def _getAncestor(adaptor, tokenNames, t, goal): - """Helper for static inContext.""" - while t is not None: - name = tokenNames[adaptor.getType(t)] - if name == goal: - return t - t = adaptor.getParent(t) - - return None - - - def matchAny(self, ignore): # ignore stream, copy of this.input - """ - Match '.' in tree parser has special meaning. Skip node or - entire tree if node has children. If children, scan until - corresponding UP node. - """ - - self._state.errorRecovery = False - - look = self.input.LT(1) - if self.input.getTreeAdaptor().getChildCount(look) == 0: - self.input.consume() # not subtree, consume 1 node and return - return - - # current node is a subtree, skip to corresponding UP. - # must count nesting level to get right UP - level = 0 - tokenType = self.input.getTreeAdaptor().getType(look) - while tokenType != EOF and not (tokenType == UP and level==0): - self.input.consume() - look = self.input.LT(1) - tokenType = self.input.getTreeAdaptor().getType(look) - if tokenType == DOWN: - level += 1 - - elif tokenType == UP: - level -= 1 - - self.input.consume() # consume UP - - - def mismatch(self, input, ttype, follow): - """ - We have DOWN/UP nodes in the stream that have no line info; override. - plus we want to alter the exception type. Don't try to recover - from tree parser errors inline... - """ - - raise MismatchedTreeNodeException(ttype, input) - - - def getErrorHeader(self, e): - """ - Prefix error message with the grammar name because message is - always intended for the programmer because the parser built - the input tree not the user. - """ - - return (self.getGrammarFileName() + - ": node from %sline %s:%s" - % (['', "after "][e.approximateLineInfo], - e.line, - e.charPositionInLine - ) - ) - - def getErrorMessage(self, e, tokenNames): - """ - Tree parsers parse nodes they usually have a token object as - payload. Set the exception token and do the default behavior. - """ - - if isinstance(self, TreeParser): - adaptor = e.input.getTreeAdaptor() - e.token = adaptor.getToken(e.node) - if e.token is not None: # could be an UP/DOWN node - e.token = CommonToken( - type=adaptor.getType(e.node), - text=adaptor.getText(e.node) - ) - - return BaseRecognizer.getErrorMessage(self, e, tokenNames) - - - def traceIn(self, ruleName, ruleIndex): - BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1)) - - - def traceOut(self, ruleName, ruleIndex): - BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1)) - - -############################################################################# -# -# tree visitor -# -############################################################################# - -class TreeVisitor(object): - """Do a depth first walk of a tree, applying pre() and post() actions - we go. - """ - - def __init__(self, adaptor=None): - if adaptor is not None: - self.adaptor = adaptor - else: - self.adaptor = CommonTreeAdaptor() - - def visit(self, t, pre_action=None, post_action=None): - """Visit every node in tree t and trigger an action for each node - before/after having visited all of its children. Bottom up walk. - Execute both actions even if t has no children. Ignore return - results from transforming children since they will have altered - the child list of this node (their parent). Return result of - applying post action to this node. - - The Python version differs from the Java version by taking two - callables 'pre_action' and 'post_action' instead of a class instance - that wraps those methods. Those callables must accept a TreeNode as - their single argument and return the (potentially transformed or - replaced) TreeNode. - """ - - isNil = self.adaptor.isNil(t) - if pre_action is not None and not isNil: - # if rewritten, walk children of new t - t = pre_action(t) - - idx = 0 - while idx < self.adaptor.getChildCount(t): - child = self.adaptor.getChild(t, idx) - self.visit(child, pre_action, post_action) - idx += 1 - - if post_action is not None and not isNil: - t = post_action(t) - - return t - -############################################################################# -# -# tree iterator -# -############################################################################# - -class TreeIterator(object): - """ - Return a node stream from a doubly-linked tree whose nodes - know what child index they are. - - Emit navigation nodes (DOWN, UP, and EOF) to let show tree structure. - """ - - def __init__(self, tree, adaptor=None): - if adaptor is None: - adaptor = CommonTreeAdaptor() - - self.root = tree - self.adaptor = adaptor - - self.first_time = True - self.tree = tree - - # If we emit UP/DOWN nodes, we need to spit out multiple nodes per - # next() call. - self.nodes = [] - - # navigation nodes to return during walk and at end - self.down = adaptor.createFromType(DOWN, "DOWN") - self.up = adaptor.createFromType(UP, "UP") - self.eof = adaptor.createFromType(EOF, "EOF") - - - def reset(self): - self.first_time = True - self.tree = self.root - self.nodes = [] - - - def __iter__(self): - return self - - - def has_next(self): - if self.first_time: - return self.root is not None - - if len(self.nodes) > 0: - return True - - if self.tree is None: - return False - - if self.adaptor.getChildCount(self.tree) > 0: - return True - - # back at root? - return self.adaptor.getParent(self.tree) is not None - - - def next(self): - if not self.has_next(): - raise StopIteration - - if self.first_time: - # initial condition - self.first_time = False - if self.adaptor.getChildCount(self.tree) == 0: - # single node tree (special) - self.nodes.append(self.eof) - return self.tree - - return self.tree - - # if any queued up, use those first - if len(self.nodes) > 0: - return self.nodes.pop(0) - - # no nodes left? - if self.tree is None: - return self.eof - - # next node will be child 0 if any children - if self.adaptor.getChildCount(self.tree) > 0: - self.tree = self.adaptor.getChild(self.tree, 0) - # real node is next after DOWN - self.nodes.append(self.tree) - return self.down - - # if no children, look for next sibling of tree or ancestor - parent = self.adaptor.getParent(self.tree) - # while we're out of siblings, keep popping back up towards root - while (parent is not None - and self.adaptor.getChildIndex(self.tree)+1 >= self.adaptor.getChildCount(parent)): - # we're moving back up - self.nodes.append(self.up) - self.tree = parent - parent = self.adaptor.getParent(self.tree) - - # no nodes left? - if parent is None: - self.tree = None # back at root? nothing left then - self.nodes.append(self.eof) # add to queue, might have UP nodes in there - return self.nodes.pop(0) - - # must have found a node with an unvisited sibling - # move to it and return it - nextSiblingIndex = self.adaptor.getChildIndex(self.tree) + 1 - self.tree = self.adaptor.getChild(parent, nextSiblingIndex) - self.nodes.append(self.tree) # add to queue, might have UP nodes in there - return self.nodes.pop(0) - - - -############################################################################# -# -# streams for rule rewriting -# -############################################################################# - -class RewriteRuleElementStream(object): - """@brief Internal helper class. - - A generic list of elements tracked in an alternative to be used in - a -> rewrite rule. We need to subclass to fill in the next() method, - which returns either an AST node wrapped around a token payload or - an existing subtree. - - Once you start next()ing, do not try to add more elements. It will - break the cursor tracking I believe. - - @see org.antlr.runtime.tree.RewriteRuleSubtreeStream - @see org.antlr.runtime.tree.RewriteRuleTokenStream - - TODO: add mechanism to detect/puke on modification after reading from - stream - """ - - def __init__(self, adaptor, elementDescription, elements=None): - # Cursor 0..n-1. If singleElement!=null, cursor is 0 until you next(), - # which bumps it to 1 meaning no more elements. - self.cursor = 0 - - # Track single elements w/o creating a list. Upon 2nd add, alloc list - self.singleElement = None - - # The list of tokens or subtrees we are tracking - self.elements = None - - # Once a node / subtree has been used in a stream, it must be dup'd - # from then on. Streams are reset after subrules so that the streams - # can be reused in future subrules. So, reset must set a dirty bit. - # If dirty, then next() always returns a dup. - self.dirty = False - - # The element or stream description; usually has name of the token or - # rule reference that this list tracks. Can include rulename too, but - # the exception would track that info. - self.elementDescription = elementDescription - - self.adaptor = adaptor - - if isinstance(elements, (list, tuple)): - # Create a stream, but feed off an existing list - self.singleElement = None - self.elements = elements - - else: - # Create a stream with one element - self.add(elements) - - - def reset(self): - """ - Reset the condition of this stream so that it appears we have - not consumed any of its elements. Elements themselves are untouched. - Once we reset the stream, any future use will need duplicates. Set - the dirty bit. - """ - - self.cursor = 0 - self.dirty = True - - - def add(self, el): - if el is None: - return - - if self.elements is not None: # if in list, just add - self.elements.append(el) - return - - if self.singleElement is None: # no elements yet, track w/o list - self.singleElement = el - return - - # adding 2nd element, move to list - self.elements = [] - self.elements.append(self.singleElement) - self.singleElement = None - self.elements.append(el) - - - def nextTree(self): - """ - Return the next element in the stream. If out of elements, throw - an exception unless size()==1. If size is 1, then return elements[0]. - - Return a duplicate node/subtree if stream is out of elements and - size==1. If we've already used the element, dup (dirty bit set). - """ - - if (self.dirty - or (self.cursor >= len(self) and len(self) == 1) - ): - # if out of elements and size is 1, dup - el = self._next() - return self.dup(el) - - # test size above then fetch - el = self._next() - return el - - - def _next(self): - """ - do the work of getting the next element, making sure that it's - a tree node or subtree. Deal with the optimization of single- - element list versus list of size > 1. Throw an exception - if the stream is empty or we're out of elements and size>1. - protected so you can override in a subclass if necessary. - """ - - if len(self) == 0: - raise RewriteEmptyStreamException(self.elementDescription) - - if self.cursor >= len(self): # out of elements? - if len(self) == 1: # if size is 1, it's ok; return and we'll dup - return self.toTree(self.singleElement) - - # out of elements and size was not 1, so we can't dup - raise RewriteCardinalityException(self.elementDescription) - - # we have elements - if self.singleElement is not None: - self.cursor += 1 # move cursor even for single element list - return self.toTree(self.singleElement) - - # must have more than one in list, pull from elements - o = self.toTree(self.elements[self.cursor]) - self.cursor += 1 - return o - - - def dup(self, el): - """ - When constructing trees, sometimes we need to dup a token or AST - subtree. Dup'ing a token means just creating another AST node - around it. For trees, you must call the adaptor.dupTree() unless - the element is for a tree root; then it must be a node dup. - """ - - raise NotImplementedError - - - def toTree(self, el): - """ - Ensure stream emits trees; tokens must be converted to AST nodes. - AST nodes can be passed through unmolested. - """ - - return el - - - def hasNext(self): - return ( (self.singleElement is not None and self.cursor < 1) - or (self.elements is not None - and self.cursor < len(self.elements) - ) - ) - - - def size(self): - if self.singleElement is not None: - return 1 - - if self.elements is not None: - return len(self.elements) - - return 0 - - __len__ = size - - - def getDescription(self): - """Deprecated. Directly access elementDescription attribute""" - - return self.elementDescription - - -class RewriteRuleTokenStream(RewriteRuleElementStream): - """@brief Internal helper class.""" - - def toTree(self, el): - # Don't convert to a tree unless they explicitly call nextTree. - # This way we can do hetero tree nodes in rewrite. - return el - - - def nextNode(self): - t = self._next() - return self.adaptor.createWithPayload(t) - - - def nextToken(self): - return self._next() - - - def dup(self, el): - raise TypeError("dup can't be called for a token stream.") - - -class RewriteRuleSubtreeStream(RewriteRuleElementStream): - """@brief Internal helper class.""" - - def nextNode(self): - """ - Treat next element as a single node even if it's a subtree. - This is used instead of next() when the result has to be a - tree root node. Also prevents us from duplicating recently-added - children; e.g., ^(type ID)+ adds ID to type and then 2nd iteration - must dup the type node, but ID has been added. - - Referencing a rule result twice is ok; dup entire tree as - we can't be adding trees as root; e.g., expr expr. - - Hideous code duplication here with super.next(). Can't think of - a proper way to refactor. This needs to always call dup node - and super.next() doesn't know which to call: dup node or dup tree. - """ - - if (self.dirty - or (self.cursor >= len(self) and len(self) == 1) - ): - # if out of elements and size is 1, dup (at most a single node - # since this is for making root nodes). - el = self._next() - return self.adaptor.dupNode(el) - - # test size above then fetch - el = self._next() - while self.adaptor.isNil(el) and self.adaptor.getChildCount(el) == 1: - el = self.adaptor.getChild(el, 0) - - # dup just the root (want node here) - return self.adaptor.dupNode(el) - - - def dup(self, el): - return self.adaptor.dupTree(el) - - - -class RewriteRuleNodeStream(RewriteRuleElementStream): - """ - Queues up nodes matched on left side of -> in a tree parser. This is - the analog of RewriteRuleTokenStream for normal parsers. - """ - - def nextNode(self): - return self._next() - - - def toTree(self, el): - return self.adaptor.dupNode(el) - - - def dup(self, el): - # we dup every node, so don't have to worry about calling dup; short- - #circuited next() so it doesn't call. - raise TypeError("dup can't be called for a node stream.") - - -class TreeRuleReturnScope(RuleReturnScope): - """ - This is identical to the ParserRuleReturnScope except that - the start property is a tree nodes not Token object - when you are parsing trees. To be generic the tree node types - have to be Object. - """ - - def __init__(self): - self.start = None - self.tree = None - - - def getStart(self): - return self.start - - - def getTree(self): - return self.tree diff --git a/thirdparty/antlr3/treewizard.py b/thirdparty/antlr3/treewizard.py deleted file mode 100644 index d96ce780f..000000000 --- a/thirdparty/antlr3/treewizard.py +++ /dev/null @@ -1,619 +0,0 @@ -""" @package antlr3.tree -@brief ANTLR3 runtime package, treewizard module - -A utility module to create ASTs at runtime. -See for an overview. Note that the API of the Python implementation is slightly different. - -""" - -# begin[licence] -# -# [The "BSD licence"] -# Copyright (c) 2005-2008 Terence Parr -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# end[licence] - -from antlr3.constants import INVALID_TOKEN_TYPE -from antlr3.tokens import CommonToken -from antlr3.tree import CommonTree, CommonTreeAdaptor - - -def computeTokenTypes(tokenNames): - """ - Compute a dict that is an inverted index of - tokenNames (which maps int token types to names). - """ - - if tokenNames is None: - return {} - - return dict((name, type) for type, name in enumerate(tokenNames)) - - -## token types for pattern parser -EOF = -1 -BEGIN = 1 -END = 2 -ID = 3 -ARG = 4 -PERCENT = 5 -COLON = 6 -DOT = 7 - -class TreePatternLexer(object): - def __init__(self, pattern): - ## The tree pattern to lex like "(A B C)" - self.pattern = pattern - - ## Index into input string - self.p = -1 - - ## Current char - self.c = None - - ## How long is the pattern in char? - self.n = len(pattern) - - ## Set when token type is ID or ARG - self.sval = None - - self.error = False - - self.consume() - - - __idStartChar = frozenset( - 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' - ) - __idChar = __idStartChar | frozenset('0123456789') - - def nextToken(self): - self.sval = "" - while self.c != EOF: - if self.c in (' ', '\n', '\r', '\t'): - self.consume() - continue - - if self.c in self.__idStartChar: - self.sval += self.c - self.consume() - while self.c in self.__idChar: - self.sval += self.c - self.consume() - - return ID - - if self.c == '(': - self.consume() - return BEGIN - - if self.c == ')': - self.consume() - return END - - if self.c == '%': - self.consume() - return PERCENT - - if self.c == ':': - self.consume() - return COLON - - if self.c == '.': - self.consume() - return DOT - - if self.c == '[': # grab [x] as a string, returning x - self.consume() - while self.c != ']': - if self.c == '\\': - self.consume() - if self.c != ']': - self.sval += '\\' - - self.sval += self.c - - else: - self.sval += self.c - - self.consume() - - self.consume() - return ARG - - self.consume() - self.error = True - return EOF - - return EOF - - - def consume(self): - self.p += 1 - if self.p >= self.n: - self.c = EOF - - else: - self.c = self.pattern[self.p] - - -class TreePatternParser(object): - def __init__(self, tokenizer, wizard, adaptor): - self.tokenizer = tokenizer - self.wizard = wizard - self.adaptor = adaptor - self.ttype = tokenizer.nextToken() # kickstart - - - def pattern(self): - if self.ttype == BEGIN: - return self.parseTree() - - elif self.ttype == ID: - node = self.parseNode() - if self.ttype == EOF: - return node - - return None # extra junk on end - - return None - - - def parseTree(self): - if self.ttype != BEGIN: - return None - - self.ttype = self.tokenizer.nextToken() - root = self.parseNode() - if root is None: - return None - - while self.ttype in (BEGIN, ID, PERCENT, DOT): - if self.ttype == BEGIN: - subtree = self.parseTree() - self.adaptor.addChild(root, subtree) - - else: - child = self.parseNode() - if child is None: - return None - - self.adaptor.addChild(root, child) - - if self.ttype != END: - return None - - self.ttype = self.tokenizer.nextToken() - return root - - - def parseNode(self): - # "%label:" prefix - label = None - - if self.ttype == PERCENT: - self.ttype = self.tokenizer.nextToken() - if self.ttype != ID: - return None - - label = self.tokenizer.sval - self.ttype = self.tokenizer.nextToken() - if self.ttype != COLON: - return None - - self.ttype = self.tokenizer.nextToken() # move to ID following colon - - # Wildcard? - if self.ttype == DOT: - self.ttype = self.tokenizer.nextToken() - wildcardPayload = CommonToken(0, ".") - node = WildcardTreePattern(wildcardPayload) - if label is not None: - node.label = label - return node - - # "ID" or "ID[arg]" - if self.ttype != ID: - return None - - tokenName = self.tokenizer.sval - self.ttype = self.tokenizer.nextToken() - - if tokenName == "nil": - return self.adaptor.nil() - - text = tokenName - # check for arg - arg = None - if self.ttype == ARG: - arg = self.tokenizer.sval - text = arg - self.ttype = self.tokenizer.nextToken() - - # create node - treeNodeType = self.wizard.getTokenType(tokenName) - if treeNodeType == INVALID_TOKEN_TYPE: - return None - - node = self.adaptor.createFromType(treeNodeType, text) - if label is not None and isinstance(node, TreePattern): - node.label = label - - if arg is not None and isinstance(node, TreePattern): - node.hasTextArg = True - - return node - - -class TreePattern(CommonTree): - """ - When using %label:TOKENNAME in a tree for parse(), we must - track the label. - """ - - def __init__(self, payload): - CommonTree.__init__(self, payload) - - self.label = None - self.hasTextArg = None - - - def toString(self): - if self.label is not None: - return '%' + self.label + ':' + CommonTree.toString(self) - - else: - return CommonTree.toString(self) - - -class WildcardTreePattern(TreePattern): - pass - - -class TreePatternTreeAdaptor(CommonTreeAdaptor): - """This adaptor creates TreePattern objects for use during scan()""" - - def createWithPayload(self, payload): - return TreePattern(payload) - - -class TreeWizard(object): - """ - Build and navigate trees with this object. Must know about the names - of tokens so you have to pass in a map or array of token names (from which - this class can build the map). I.e., Token DECL means nothing unless the - class can translate it to a token type. - - In order to create nodes and navigate, this class needs a TreeAdaptor. - - This class can build a token type -> node index for repeated use or for - iterating over the various nodes with a particular type. - - This class works in conjunction with the TreeAdaptor rather than moving - all this functionality into the adaptor. An adaptor helps build and - navigate trees using methods. This class helps you do it with string - patterns like "(A B C)". You can create a tree from that pattern or - match subtrees against it. - """ - - def __init__(self, adaptor=None, tokenNames=None, typeMap=None): - if adaptor is None: - self.adaptor = CommonTreeAdaptor() - - else: - self.adaptor = adaptor - - if typeMap is None: - self.tokenNameToTypeMap = computeTokenTypes(tokenNames) - - else: - if tokenNames is not None: - raise ValueError("Can't have both tokenNames and typeMap") - - self.tokenNameToTypeMap = typeMap - - - def getTokenType(self, tokenName): - """Using the map of token names to token types, return the type.""" - - try: - return self.tokenNameToTypeMap[tokenName] - except KeyError: - return INVALID_TOKEN_TYPE - - - def create(self, pattern): - """ - Create a tree or node from the indicated tree pattern that closely - follows ANTLR tree grammar tree element syntax: - - (root child1 ... child2). - - You can also just pass in a node: ID - - Any node can have a text argument: ID[foo] - (notice there are no quotes around foo--it's clear it's a string). - - nil is a special name meaning "give me a nil node". Useful for - making lists: (nil A B C) is a list of A B C. - """ - - tokenizer = TreePatternLexer(pattern) - parser = TreePatternParser(tokenizer, self, self.adaptor) - return parser.pattern() - - - def index(self, tree): - """Walk the entire tree and make a node name to nodes mapping. - - For now, use recursion but later nonrecursive version may be - more efficient. Returns a dict int -> list where the list is - of your AST node type. The int is the token type of the node. - """ - - m = {} - self._index(tree, m) - return m - - - def _index(self, t, m): - """Do the work for index""" - - if t is None: - return - - ttype = self.adaptor.getType(t) - elements = m.get(ttype) - if elements is None: - m[ttype] = elements = [] - - elements.append(t) - for i in range(self.adaptor.getChildCount(t)): - child = self.adaptor.getChild(t, i) - self._index(child, m) - - - def find(self, tree, what): - """Return a list of matching token. - - what may either be an integer specifzing the token type to find or - a string with a pattern that must be matched. - - """ - - if isinstance(what, (int, long)): - return self._findTokenType(tree, what) - - elif isinstance(what, basestring): - return self._findPattern(tree, what) - - else: - raise TypeError("'what' must be string or integer") - - - def _findTokenType(self, t, ttype): - """Return a List of tree nodes with token type ttype""" - - nodes = [] - - def visitor(tree, parent, childIndex, labels): - nodes.append(tree) - - self.visit(t, ttype, visitor) - - return nodes - - - def _findPattern(self, t, pattern): - """Return a List of subtrees matching pattern.""" - - subtrees = [] - - # Create a TreePattern from the pattern - tokenizer = TreePatternLexer(pattern) - parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) - tpattern = parser.pattern() - - # don't allow invalid patterns - if (tpattern is None or tpattern.isNil() - or isinstance(tpattern, WildcardTreePattern)): - return None - - rootTokenType = tpattern.getType() - - def visitor(tree, parent, childIndex, label): - if self._parse(tree, tpattern, None): - subtrees.append(tree) - - self.visit(t, rootTokenType, visitor) - - return subtrees - - - def visit(self, tree, what, visitor): - """Visit every node in tree matching what, invoking the visitor. - - If what is a string, it is parsed as a pattern and only matching - subtrees will be visited. - The implementation uses the root node of the pattern in combination - with visit(t, ttype, visitor) so nil-rooted patterns are not allowed. - Patterns with wildcard roots are also not allowed. - - If what is an integer, it is used as a token type and visit will match - all nodes of that type (this is faster than the pattern match). - The labels arg of the visitor action method is never set (it's None) - since using a token type rather than a pattern doesn't let us set a - label. - """ - - if isinstance(what, (int, long)): - self._visitType(tree, None, 0, what, visitor) - - elif isinstance(what, basestring): - self._visitPattern(tree, what, visitor) - - else: - raise TypeError("'what' must be string or integer") - - - def _visitType(self, t, parent, childIndex, ttype, visitor): - """Do the recursive work for visit""" - - if t is None: - return - - if self.adaptor.getType(t) == ttype: - visitor(t, parent, childIndex, None) - - for i in range(self.adaptor.getChildCount(t)): - child = self.adaptor.getChild(t, i) - self._visitType(child, t, i, ttype, visitor) - - - def _visitPattern(self, tree, pattern, visitor): - """ - For all subtrees that match the pattern, execute the visit action. - """ - - # Create a TreePattern from the pattern - tokenizer = TreePatternLexer(pattern) - parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) - tpattern = parser.pattern() - - # don't allow invalid patterns - if (tpattern is None or tpattern.isNil() - or isinstance(tpattern, WildcardTreePattern)): - return - - rootTokenType = tpattern.getType() - - def rootvisitor(tree, parent, childIndex, labels): - labels = {} - if self._parse(tree, tpattern, labels): - visitor(tree, parent, childIndex, labels) - - self.visit(tree, rootTokenType, rootvisitor) - - - def parse(self, t, pattern, labels=None): - """ - Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels - on the various nodes and '.' (dot) as the node/subtree wildcard, - return true if the pattern matches and fill the labels Map with - the labels pointing at the appropriate nodes. Return false if - the pattern is malformed or the tree does not match. - - If a node specifies a text arg in pattern, then that must match - for that node in t. - """ - - tokenizer = TreePatternLexer(pattern) - parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) - tpattern = parser.pattern() - - return self._parse(t, tpattern, labels) - - - def _parse(self, t1, tpattern, labels): - """ - Do the work for parse. Check to see if the tpattern fits the - structure and token types in t1. Check text if the pattern has - text arguments on nodes. Fill labels map with pointers to nodes - in tree matched against nodes in pattern with labels. - """ - - # make sure both are non-null - if t1 is None or tpattern is None: - return False - - # check roots (wildcard matches anything) - if not isinstance(tpattern, WildcardTreePattern): - if self.adaptor.getType(t1) != tpattern.getType(): - return False - - # if pattern has text, check node text - if (tpattern.hasTextArg - and self.adaptor.getText(t1) != tpattern.getText()): - return False - - if tpattern.label is not None and labels is not None: - # map label in pattern to node in t1 - labels[tpattern.label] = t1 - - # check children - n1 = self.adaptor.getChildCount(t1) - n2 = tpattern.getChildCount() - if n1 != n2: - return False - - for i in range(n1): - child1 = self.adaptor.getChild(t1, i) - child2 = tpattern.getChild(i) - if not self._parse(child1, child2, labels): - return False - - return True - - - def equals(self, t1, t2, adaptor=None): - """ - Compare t1 and t2; return true if token types/text, structure match - exactly. - The trees are examined in their entirety so that (A B) does not match - (A B C) nor (A (B C)). - """ - - if adaptor is None: - adaptor = self.adaptor - - return self._equals(t1, t2, adaptor) - - - def _equals(self, t1, t2, adaptor): - # make sure both are non-null - if t1 is None or t2 is None: - return False - - # check roots - if adaptor.getType(t1) != adaptor.getType(t2): - return False - - if adaptor.getText(t1) != adaptor.getText(t2): - return False - - # check children - n1 = adaptor.getChildCount(t1) - n2 = adaptor.getChildCount(t2) - if n1 != n2: - return False - - for i in range(n1): - child1 = adaptor.getChild(t1, i) - child2 = adaptor.getChild(t2, i) - if not self._equals(child1, child2, adaptor): - return False - - return True