Source code for lib.Parser

# EMACS settings: -*-	tab-width: 2; indent-tabs-mode: t; python-indent-offset: 2 -*-
# vim: tabstop=2:shiftwidth=2:noexpandtab
# kate: tab-width 2; replace-tabs off; indent-width 2;
#
# ==============================================================================
# Authors:          Patrick Lehmann
#
# Python Module:    TODO
#
# License:
# ==============================================================================
# Copyright 2007-2016 Patrick Lehmann - Dresden, Germany
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# load dependencies
from enum       import Enum


__api__ = [
	'ParserException',
	'MismatchingParserResult',
	'EmptyChoiseParserResult',
	'MatchingParserResult',
	'GreedyMatchingParserResult',
	'SourceCodePosition',
	'Token',
	'SuperToken',
	'ValuedToken',
	'StartOfDocumentToken',
	'CharacterToken',
	'SpaceToken',
	'DelimiterToken',
	'NumberToken',
	'StringToken',
	'Tokenizer'
]
__all__ = __api__


[docs]class ParserException(Exception): pass
[docs]class MismatchingParserResult(StopIteration): pass
[docs]class EmptyChoiseParserResult(MismatchingParserResult): pass
[docs]class MatchingParserResult(StopIteration): pass
[docs]class GreedyMatchingParserResult(MatchingParserResult): pass
[docs]class SourceCodePosition: def __init__(self, row, column, absolute): self.Row = row self.Column = column self.Absolute = absolute def __str__(self): return "(line: {0}, col: {1})".format(self.Row, self.Column)
[docs]class Token: def __init__(self, previousToken, start, end=None): previousToken.NextToken = self self._previousToken = previousToken self.NextToken = None self.Start = start self.End = end def __len__(self): return self.End.Absolute - self.Start.Absolute + 1 @property def PreviousToken(self): return self._previousToken @PreviousToken.setter def PreviousToken(self, value): self._previousToken = value value.NextToken = self # @property # def NextToken(self): # return self._nextToken # @NextToken.setter # def NextToken(self, value): # self._nextToken = value @property def Length(self): return len(self) def __str__(self): return repr(self) + " at " + str(self.Start)
[docs]class SuperToken(Token): def __init__(self, startToken, endToken=None): super().__init__(startToken.PreviousToken, startToken.Start, endToken.End if endToken else None) self.StartToken = startToken self.EndToken = endToken def __iter__(self): token = self.StartToken while (token is not self.EndToken): yield token token = token.NextToken yield self.EndToken
[docs]class ValuedToken(Token): def __init__(self, previousToken, value, start, end=None): super().__init__(previousToken, start, end) self.Value = value
[docs]class StartOfDocumentToken(ValuedToken): def __init__(self): self._previousToken = None self._nextToken = None self.Value = None self.Start = SourceCodePosition(1, 1, 1) self.End = None def __len__(self): return 0 def __str__(self): return "<StartOfDocumentToken>"
[docs]class CharacterToken(ValuedToken): def __init__(self, previousToken, value, start): if (len(value) != 1): raise ValueError() super().__init__(previousToken, value, start=start, end=start) def __len__(self): return 1 __CHARACTER_TRANSLATION__ = { "\r": "CR", "\n": "NL", "\t": "TAB", " ": "SPACE" } def __str__(self): return "<CharacterToken '{char}' at {line}:{col}>".format( char=self.__repr__(), pos=self.Start.Absolute, line=self.Start.Row, col=self.Start.Column) def __repr__(self): if (self.Value in self.__CHARACTER_TRANSLATION__): return self.__CHARACTER_TRANSLATION__[self.Value] else: return self.Value
[docs]class SpaceToken(ValuedToken): def __str__(self): return "<SpaceToken '{value}' at {line}:{col}>".format( value=self.Value, pos=self.Start.Absolute, line=self.Start.Row, col=self.Start.Column)
[docs]class DelimiterToken(ValuedToken): def __str__(self): return "<DelimiterToken '{value}' at {line}:{col}>".format( value=self.Value, pos=self.Start.Absolute, line=self.Start.Row, col=self.Start.Column)
[docs]class NumberToken(ValuedToken): def __str__(self): return "<NumberToken '{value}' at {line}:{col}>".format( value=self.Value, pos=self.Start.Absolute, line=self.Start.Row, col=self.Start.Column)
[docs]class StringToken(ValuedToken): def __str__(self): return "<StringToken '{value}' at {line}:{col}>".format( value=self.Value, pos=self.Start.Absolute, line=self.Start.Row, col=self.Start.Column)
[docs]class Tokenizer:
[docs] class TokenKind(Enum): SpaceChars = 0 AlphaChars = 1 NumberChars = 2 DelimiterChars = 3 OtherChars = 4
[docs] @staticmethod def GetCharacterTokenizer(iterable): previousToken = None absolute = 0 column = 0 row = 1 for char in iterable: absolute += 1 column += 1 previousToken = CharacterToken(previousToken, char, SourceCodePosition(row, column, absolute)) yield previousToken if (char == "\n"): column = 0 row += 1
__ALPHA_CHARS__ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" __NUMBER_CHARS__ = "0123456789" __SPACE_CHARS__ = " \t"
[docs] @classmethod def GetWordTokenizer(cls, iterable, alphaCharacters=__ALPHA_CHARS__, numberCharacters=__NUMBER_CHARS__, whiteSpaceCharacters=__SPACE_CHARS__): previousToken = StartOfDocumentToken() tokenKind = cls.TokenKind.OtherChars start = SourceCodePosition(1, 1, 1) buffer = "" absolute = 0 column = 0 row = 1 yield previousToken for char in iterable: absolute += 1 column += 1 if (tokenKind is cls.TokenKind.SpaceChars): if (char in whiteSpaceCharacters): buffer += char else: previousToken = SpaceToken(previousToken, buffer, start, SourceCodePosition(row, column, absolute)) yield previousToken start = SourceCodePosition(row, column, absolute) buffer = char if (char in alphaCharacters): tokenKind = cls.TokenKind.AlphaChars elif (char in numberCharacters): tokenKind = cls.TokenKind.NumberChars else: tokenKind = cls.TokenKind.OtherChars previousToken = CharacterToken(previousToken, char, start) yield previousToken elif (tokenKind is cls.TokenKind.AlphaChars): if (char in alphaCharacters): buffer += char else: previousToken = StringToken(previousToken, buffer, start, SourceCodePosition(row, column, absolute)) yield previousToken start = SourceCodePosition(row, column, absolute) buffer = char if (char in " \t"): tokenKind = cls.TokenKind.SpaceChars elif (char in numberCharacters): tokenKind = cls.TokenKind.NumberChars else: tokenKind = cls.TokenKind.OtherChars previousToken = CharacterToken(previousToken, char, start) yield previousToken elif (tokenKind is cls.TokenKind.NumberChars): if (char in numberCharacters): buffer += char else: previousToken = NumberToken(previousToken, buffer, start,SourceCodePosition(row, column, absolute)) yield previousToken start = SourceCodePosition(row, column, absolute) buffer = char if (char in " \t"): tokenKind = cls.TokenKind.SpaceChars elif (char in alphaCharacters): tokenKind = cls.TokenKind.AlphaChars else: tokenKind = cls.TokenKind.OtherChars previousToken = CharacterToken(previousToken, char, start) yield previousToken elif (tokenKind is cls.TokenKind.OtherChars): start = SourceCodePosition(row, column, absolute) buffer = char if (char in " \t"): tokenKind = cls.TokenKind.SpaceChars elif (char in alphaCharacters): tokenKind = cls.TokenKind.AlphaChars elif (char in numberCharacters): tokenKind = cls.TokenKind.NumberChars else: previousToken = CharacterToken(previousToken, char, start) yield previousToken else: raise ParserException("Unknown state.") if (char == "\n"): column = 0 row += 1
# end for