140 lines
3.4 KiB
Python
140 lines
3.4 KiB
Python
|
|
import re
|
|
|
|
|
|
class DLexToken:
|
|
"""
|
|
DLexToken contains:
|
|
'id' - the ID of the token (match with the value DLexer.AddToken returns).
|
|
'val' - the token's string.
|
|
'lineNumber' - the line the token was encountered on.
|
|
"""
|
|
pass
|
|
|
|
|
|
class DLexState:
|
|
pass
|
|
|
|
|
|
class DLexer:
|
|
"""
|
|
DLex is a simple lexer simulator. Here is how to use it.
|
|
|
|
1. Call AddToken to add the regular expressions that it will parse. Add them in
|
|
order of precedence. Store the value returned from AddToken so you can compare
|
|
it to the token ID returned by GetToken to determine what kind of token was found.
|
|
|
|
2. Call BeginRead or BeginReadFile to setup the initial file.
|
|
|
|
3. Repeatedly call GetToken.
|
|
If it returns None, then there are no more tokens that match your specifications.
|
|
If it returns a value, then it is a DLexToken with.
|
|
"""
|
|
|
|
def __init__( self, bSkipWhitespace=1 ):
|
|
self.__tokens = []
|
|
self.__curTokenID = 0
|
|
self.__notnewline = re.compile( '[^\\r\\n]*' )
|
|
|
|
self.__bSkipWhitespace = bSkipWhitespace
|
|
if bSkipWhitespace:
|
|
self.__whitespace = re.compile( '[ \\t\\f\\v]+' )
|
|
self.__newline = re.compile( '[\\r\\n]' )
|
|
|
|
|
|
def GetErrorTokenID( self ):
|
|
return -1
|
|
|
|
|
|
def AddToken( self, expr, flags=0 ):
|
|
tokenID = self.__curTokenID
|
|
self.__tokens.append( [tokenID, re.compile( expr, flags )] )
|
|
self.__curTokenID += 1
|
|
return tokenID
|
|
|
|
|
|
# Store and restore the state.
|
|
def BackupState( self ):
|
|
ret = DLexState()
|
|
ret.lineNumber = self.__lineNumber
|
|
ret.currentCharacter = self.__currentCharacter
|
|
ret.fileLen = self.__fileLen
|
|
return ret
|
|
|
|
def RestoreState( self, state ):
|
|
self.__lineNumber = state.lineNumber
|
|
self.__currentCharacter = state.currentCharacter
|
|
self.__fileLen = state.fileLen
|
|
|
|
|
|
def BeginRead( self, str ):
|
|
self.__curString = str
|
|
self.__lineNumber = 1
|
|
self.__currentCharacter = 0
|
|
self.__fileLen = len( str )
|
|
|
|
|
|
def BeginReadFile( self, fileName ):
|
|
file = open( fileName, 'r' )
|
|
self.BeginRead( file.read() )
|
|
file.close()
|
|
|
|
|
|
def GetToken( self ):
|
|
# Skip whitespace.
|
|
self.__SkipWhitespace()
|
|
|
|
# Now return the first token that we have a match for.
|
|
for token in self.__tokens:
|
|
m = token[1].match( self.__curString, self.__currentCharacter )
|
|
if m:
|
|
ret = DLexToken()
|
|
ret.id = token[0]
|
|
ret.val = self.__curString[ m.start() : m.end() ]
|
|
ret.lineNumber = self.__lineNumber
|
|
self.__currentCharacter = m.end()
|
|
return ret
|
|
|
|
if self.__currentCharacter < self.__fileLen:
|
|
print "NO MATCH FOR '%s'" % self.__curString[ self.__currentCharacter : self.__currentCharacter+35 ]
|
|
ret = DLexToken()
|
|
ret.id = self.GetErrorTokenID()
|
|
ret.val = self.__curString[ self.__currentCharacter : ]
|
|
self.__currentCharacter = self.__fileLen
|
|
return ret
|
|
#print "%d" % t
|
|
|
|
return None
|
|
|
|
|
|
def GetLineNumber( self ):
|
|
return self.__lineNumber
|
|
|
|
|
|
def GetPercentComplete( self ):
|
|
return (self.__currentCharacter * 100) / self.__fileLen
|
|
|
|
|
|
def GetLineContents( self ):
|
|
m = self.__notnewline.match( self.__curString, self.__currentCharacter )
|
|
if m:
|
|
return self.__curString[ m.start() : m.end() ]
|
|
else:
|
|
return ""
|
|
|
|
|
|
def __SkipWhitespace( self ):
|
|
if self.__bSkipWhitespace:
|
|
while 1:
|
|
a = self.__whitespace.match( self.__curString, self.__currentCharacter )
|
|
b = self.__newline.match( self.__curString, self.__currentCharacter )
|
|
if a:
|
|
self.__currentCharacter = a.end()
|
|
continue
|
|
elif b:
|
|
self.__currentCharacter = b.end()
|
|
self.__lineNumber += 1
|
|
continue
|
|
else:
|
|
break
|
|
|