268 lines
7.8 KiB
Python
268 lines
7.8 KiB
Python
"""SourceReader class for Cheetah's Parser and CodeGenerator
|
|
"""
|
|
import re
|
|
import sys
|
|
|
|
EOLre = re.compile(r'[ \f\t]*(?:\r\n|\r|\n)')
|
|
EOLZre = re.compile(r'(?:\r\n|\r|\n|\Z)')
|
|
ENCODINGsearch = re.compile("coding[=:]\s*([-\w.]+)").search
|
|
|
|
class Error(Exception):
|
|
pass
|
|
|
|
class SourceReader(object):
|
|
def __init__(self, src, filename=None, breakPoint=None, encoding=None):
|
|
self._src = src
|
|
self._filename = filename
|
|
self._srcLen = len(src)
|
|
if breakPoint == None:
|
|
self._breakPoint = self._srcLen
|
|
else:
|
|
self.setBreakPoint(breakPoint)
|
|
self._pos = 0
|
|
self._bookmarks = {}
|
|
self._posTobookmarkMap = {}
|
|
|
|
## collect some meta-information
|
|
self._EOLs = []
|
|
pos = 0
|
|
while pos < len(self):
|
|
EOLmatch = EOLZre.search(src, pos)
|
|
self._EOLs.append(EOLmatch.start())
|
|
pos = EOLmatch.end()
|
|
|
|
self._BOLs = []
|
|
for pos in self._EOLs:
|
|
BOLpos = self.findBOL(pos)
|
|
self._BOLs.append(BOLpos)
|
|
|
|
def src(self):
|
|
return self._src
|
|
|
|
def filename(self):
|
|
return self._filename
|
|
|
|
def __len__(self):
|
|
return self._breakPoint
|
|
|
|
def __getitem__(self, i):
|
|
if not isinstance(i, int):
|
|
self.checkPos(i.stop)
|
|
else:
|
|
self.checkPos(i)
|
|
return self._src[i]
|
|
|
|
def __getslice__(self, i, j):
|
|
i = max(i, 0); j = max(j, 0)
|
|
return self._src[i:j]
|
|
|
|
def splitlines(self):
|
|
if not hasattr(self, '_srcLines'):
|
|
self._srcLines = self._src.splitlines()
|
|
return self._srcLines
|
|
|
|
def lineNum(self, pos=None):
|
|
if pos == None:
|
|
pos = self._pos
|
|
|
|
for i in range(len(self._BOLs)):
|
|
if pos >= self._BOLs[i] and pos <= self._EOLs[i]:
|
|
return i
|
|
|
|
def getRowCol(self, pos=None):
|
|
if pos == None:
|
|
pos = self._pos
|
|
lineNum = self.lineNum(pos)
|
|
BOL, EOL = self._BOLs[lineNum], self._EOLs[lineNum]
|
|
return lineNum+1, pos-BOL+1
|
|
|
|
def getRowColLine(self, pos=None):
|
|
if pos == None:
|
|
pos = self._pos
|
|
row, col = self.getRowCol(pos)
|
|
return row, col, self.splitlines()[row-1]
|
|
|
|
def getLine(self, pos):
|
|
if pos == None:
|
|
pos = self._pos
|
|
lineNum = self.lineNum(pos)
|
|
return self.splitlines()[lineNum]
|
|
|
|
def pos(self):
|
|
return self._pos
|
|
|
|
def setPos(self, pos):
|
|
self.checkPos(pos)
|
|
self._pos = pos
|
|
|
|
|
|
def validPos(self, pos):
|
|
return pos <= self._breakPoint and pos >=0
|
|
|
|
def checkPos(self, pos):
|
|
if not pos <= self._breakPoint:
|
|
raise Error("pos (" + str(pos) + ") is invalid: beyond the stream's end (" +
|
|
str(self._breakPoint-1) + ")" )
|
|
elif not pos >=0:
|
|
raise Error("pos (" + str(pos) + ") is invalid: less than 0" )
|
|
|
|
def breakPoint(self):
|
|
return self._breakPoint
|
|
|
|
def setBreakPoint(self, pos):
|
|
if pos > self._srcLen:
|
|
raise Error("New breakpoint (" + str(pos) +
|
|
") is invalid: beyond the end of stream's source string (" +
|
|
str(self._srcLen) + ")" )
|
|
elif not pos >= 0:
|
|
raise Error("New breakpoint (" + str(pos) + ") is invalid: less than 0" )
|
|
|
|
self._breakPoint = pos
|
|
|
|
def setBookmark(self, name):
|
|
self._bookmarks[name] = self._pos
|
|
self._posTobookmarkMap[self._pos] = name
|
|
|
|
def hasBookmark(self, name):
|
|
return name in self._bookmarks
|
|
|
|
def gotoBookmark(self, name):
|
|
if not self.hasBookmark(name):
|
|
raise Error("Invalid bookmark (" + name + ") is invalid: does not exist")
|
|
pos = self._bookmarks[name]
|
|
if not self.validPos(pos):
|
|
raise Error("Invalid bookmark (" + name + ', '+
|
|
str(pos) + ") is invalid: pos is out of range" )
|
|
self._pos = pos
|
|
|
|
def atEnd(self):
|
|
return self._pos >= self._breakPoint
|
|
|
|
def atStart(self):
|
|
return self._pos == 0
|
|
|
|
def peek(self, offset=0):
|
|
self.checkPos(self._pos+offset)
|
|
pos = self._pos + offset
|
|
return self._src[pos]
|
|
|
|
def getc(self):
|
|
pos = self._pos
|
|
if self.validPos(pos+1):
|
|
self._pos += 1
|
|
return self._src[pos]
|
|
|
|
def ungetc(self, c=None):
|
|
if not self.atStart():
|
|
raise Error('Already at beginning of stream')
|
|
|
|
self._pos -= 1
|
|
if not c==None:
|
|
self._src[self._pos] = c
|
|
|
|
def advance(self, offset=1):
|
|
self.checkPos(self._pos + offset)
|
|
self._pos += offset
|
|
|
|
def rev(self, offset=1):
|
|
self.checkPos(self._pos - offset)
|
|
self._pos -= offset
|
|
|
|
def read(self, offset):
|
|
self.checkPos(self._pos + offset)
|
|
start = self._pos
|
|
self._pos += offset
|
|
return self._src[start:self._pos]
|
|
|
|
def readTo(self, to, start=None):
|
|
self.checkPos(to)
|
|
if start == None:
|
|
start = self._pos
|
|
self._pos = to
|
|
return self._src[start:to]
|
|
|
|
|
|
def readToEOL(self, start=None, gobble=True):
|
|
EOLmatch = EOLZre.search(self.src(), self.pos())
|
|
if gobble:
|
|
pos = EOLmatch.end()
|
|
else:
|
|
pos = EOLmatch.start()
|
|
return self.readTo(to=pos, start=start)
|
|
|
|
|
|
def find(self, it, pos=None):
|
|
if pos == None:
|
|
pos = self._pos
|
|
return self._src.find(it, pos )
|
|
|
|
def startswith(self, it, pos=None):
|
|
if self.find(it, pos) == self.pos():
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def rfind(self, it, pos):
|
|
if pos == None:
|
|
pos = self._pos
|
|
return self._src.rfind(it, pos)
|
|
|
|
def findBOL(self, pos=None):
|
|
if pos == None:
|
|
pos = self._pos
|
|
src = self.src()
|
|
return max(src.rfind('\n', 0, pos)+1, src.rfind('\r', 0, pos)+1, 0)
|
|
|
|
def findEOL(self, pos=None, gobble=False):
|
|
if pos == None:
|
|
pos = self._pos
|
|
|
|
match = EOLZre.search(self.src(), pos)
|
|
if gobble:
|
|
return match.end()
|
|
else:
|
|
return match.start()
|
|
|
|
def isLineClearToPos(self, pos=None):
|
|
if pos == None:
|
|
pos = self.pos()
|
|
self.checkPos(pos)
|
|
src = self.src()
|
|
BOL = self.findBOL()
|
|
return BOL == pos or src[BOL:pos].isspace()
|
|
|
|
def matches(self, strOrRE):
|
|
if isinstance(strOrRE, (str, unicode)):
|
|
return self.startswith(strOrRE, pos=self.pos())
|
|
else: # assume an re object
|
|
return strOrRE.match(self.src(), self.pos())
|
|
|
|
def matchWhiteSpace(self, WSchars=' \f\t'):
|
|
return (not self.atEnd()) and self.peek() in WSchars
|
|
|
|
def getWhiteSpace(self, max=None, WSchars=' \f\t'):
|
|
if not self.matchWhiteSpace(WSchars):
|
|
return ''
|
|
start = self.pos()
|
|
breakPoint = self.breakPoint()
|
|
if max is not None:
|
|
breakPoint = min(breakPoint, self.pos()+max)
|
|
while self.pos() < breakPoint:
|
|
self.advance()
|
|
if not self.matchWhiteSpace(WSchars):
|
|
break
|
|
return self.src()[start:self.pos()]
|
|
|
|
def matchNonWhiteSpace(self, WSchars=' \f\t\n\r'):
|
|
return self.atEnd() or not self.peek() in WSchars
|
|
|
|
def getNonWhiteSpace(self, WSchars=' \f\t\n\r'):
|
|
if not self.matchNonWhiteSpace(WSchars):
|
|
return ''
|
|
start = self.pos()
|
|
while self.pos() < self.breakPoint():
|
|
self.advance()
|
|
if not self.matchNonWhiteSpace(WSchars):
|
|
break
|
|
return self.src()[start:self.pos()]
|