Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Visual FA Python port
- # copyright (c) 2025 by honey the codewitch
- # MIT license
- from __future__ import annotations
- import math
- import os
- from subprocess import Popen, PIPE, STDOUT
- from collections.abc import Iterator, Callable
- class _ParseContext:
- def __init__(self, input, tabWidth = 4, position = 0, line = 1, column = 0):
- self.codepoint = -2
- self.position = 0
- self.line = 1
- self.column = 0
- self.tabWidth = 4
- self.captureBuffer = ""
- self.input = input
- self.tabWidth = tabWidth
- self.position = position
- self.line = line
- self.column = column
- def advance(self):
- if self.position >= len(self.input):
- self.codepoint = -1
- else:
- self.codepoint = ord(self.input[self.position])
- match self.codepoint:
- case 10:
- self.line += 1
- self.column = 0
- case 13:
- self.column = 0
- case 9:
- self.column = (((self.column - 1) / self.tabWidth) + 1) * self.tabWidth + 1
- case _:
- if self.codepoint > 31:
- self.column += 1
- self.position += 1
- return self.codepoint
- def capture(self):
- self.captureBuffer += chr(self.codepoint)
- def ensureStarted(self):
- if self.codepoint == -2:
- self.advance()
- @staticmethod
- def _getCodepoints(values):
- result = []
- for v in values:
- result.append(ord(v))
- return result
- def _getErrorMessage(self, expecting):
- sb = ""
- match len(expecting):
- case 0:
- # do nothing
- sb = sb
- case 1:
- sb = ""
- if expecting[0] == -1:
- sb += "end of input"
- else:
- sb += "\""
- sb += chr(expecting[0])
- sb += "\""
- case 2:
- sb = ""
- if expecting[0] == -1:
- sb += "end of input"
- else:
- sb += "\""
- sb += chr(expecting[0])
- sb += "\""
- sb += " or "
- if expecting[1] == -1:
- sb += "end of input"
- else:
- sb += "\""
- sb += chr(expecting[1])
- sb += "\""
- case _: # length > 2
- sb = ""
- if expecting[0] == -1:
- sb += "end of input"
- else:
- sb += "\""
- sb += chr(expecting[0])
- sb += "\""
- l = len(expecting) - 1
- i = 1;
- while i < l:
- sb += ", "
- if -expecting[i] == -1:
- sb += "end of input"
- else:
- sb += "\""
- sb += chr(expecting[1])
- sb += "\""
- i += 1
- sb += ", or "
- if expecting[i] == -1:
- sb += "end of input"
- else:
- sb += "\""
- sb += chr(expecting[i])
- sb += "\""
- if self.codepoint == -1:
- if len(expecting) == 0:
- return "Unexpected end of input"
- return "Unexpected end of input. Expecting " + sb
- if len(expecting) == 0:
- return "Unexpected character \"" + chr(self.codepoint) + "\" in input"
- return "Unexpected character \"" + chr(self.codepoint) + "\" in input. Expecting " + sb
- def expecting(self, values):
- codepoints = _ParseContext._getCodepoints(values)
- if self.codepoint == -2:
- raise Exception(f"The cursor is before the beginning of the input at {self.position}");
- match len(codepoints):
- case 0:
- if self.codepoint == -1:
- raise Exception(f"Unexpected end of input at {self.position}")
- case 1:
- if codepoints[0] != self.codepoint:
- raise Exception(f"{self._getErrorMessage(codepoints)} at {self.position}")
- case _:
- if not (self.codepoint in codepoints):
- raise Exception(f"{self._getErrorMessage(codepoints)} at {self.position}""IsLetter"] = FACharacterClasses.isLetter
- result["IsDigit"] = FACharacterClasses.isDigit
- result["IsLetterOrDigit"] = FACharacterClasses.isLetterOrDigit
- result["IsWhiteSpace"] = FACharacterClasses.isWhiteSpace
- result["alnum"] = FACharacterClasses.alnum
- result["alpha"] = FACharacterClasses.alpha
- result["cntrl"] = FACharacterClasses.cntrl
- result["digit"] = FACharacterClasses.digit
- result["graph"] = FACharacterClasses.graph
- result["ascii"] = FACharacterClasses.ascii
- result["blank"] = FACharacterClasses.blank
- result["lower"] = FACharacterClasses.lower
- result["print"] = FACharacterClasses.print_
- result["punct"] = FACharacterClasses.punct
- result["space"] = FACharacterClasses.space
- result["upper"] = FACharacterClasses.upper
- result["word"] = FACharacterClasses.word
- result["xdigit"] = FACharacterClasses.xdigit
- FACharacterClasses.__known = result
- return FACharacterClasses.__known
- class _ExpEdge:
- def __init__(self):
- self.exp = None
- self.fromState = None
- self.toState = None
- class _FListNode:
- def __init__(self, q, sl):
- self.nextNode = None
- self.prevNode = None
- self.state = q
- self.stateList = sl
- if (sl.count == 0):
- sl.count += 1
- sl.first = self
- sl.last = self
- else:
- sl.count += 1
- sl.last.nextNode = self
- self.prevNode = sl.last
- sl.last = self
- def remove(self):
- self.stateList.count -= 1
- if self.stateList.first is self:
- self.stateList.first = self.nextNode
- else:
- self.prevNode.nextNode = self.nextNode
- if self.stateList.last is self:
- self.stateList.last = self.prevNode
- else:
- self.nextNode.prevNode = self.prevNode
- class _FList:
- def __init__(self):
- self.count = 0
- self.first = None
- self.last = None
- def add(self, q):
- return _FListNode(q, self)
- class _FKeyPair:
- def __init__(self, key, value):
- self.key = key
- self.value = value
- class FADotGraphOptions:
- def __init__(self):
- self.dpi: int = 300
- self.debugString: str | None = None
- self.blockEnds: list[FA] | None = None
- self.debugSourceNfa: FA | None = None
- self.debugShowNfa: bool = False
- self.hideAcceptSymbolIds: bool = False
- self.acceptSymbolNames: list[str] | None = None
- self.vertical: bool = False
- self.statePrefix: str = "q"
- class FAProgress:
- def __init__(self, callback = None):
- self.value: int = 0
- self._callback = callback
- def report(self, value: int) -> None:
- self.value = value
- if self._callback != None:
- self._callback(value)
- class FARange:
- def __init__(self, min : int = -1, max : int = -1):
- self.min = min
- self.max = max
- def __lt__(self,rhs):
- if self.max == rhs.max:
- return self.min < rhs.min
- return self.max < rhs.max
- def intersects(self, rhs: FARange):
- return (rhs.min >= self.min and rhs.min <= self.max) or (rhs.max >= self.min and rhs.max <= self.max)
- @staticmethod
- def toUnpacked(packedRanges : list[int]) -> list[FARange]:
- result = []
- length = len(packedRanges)/2
- i = 0
- while i < length:
- j = i * 2
- result.append(FARange(packedRanges[j], packedRanges[j + 1]))
- i += 1
- return result
- @staticmethod
- def toPacked(pairs : Iterator[FARange]) -> list[int]:
- result = []
- for pair in pairs:
- result.append(pair.min)
- result.append(pair.max)
- return result
- class FATransition:
- def __init__(self, to: FA, min : int = -1, max : int = -1):
- self.to: FA = to
- self.min: int = min
- self.max: int = max
- def isEpsilon(self) -> bool:
- return self.min == -1 or self.max == -1
- def __lt__(self,rhs):
- if self.max == rhs.max:
- return self.min < rhs.min
- return self.max < rhs.max
- class FA:
- def __init__(self, acceptSymbol : int = -1):
- self.acceptSymbol: int = acceptSymbol
- self.transitions: list[FATransition] = []
- self.__minimizationTag: int = -1
- self.__isDeterministic: bool = True
- self.__isCompact: bool = True
- self.id: int = -1
- self.__fromStates: list[FA] = []
- def __str__(self):
- if self.id == -1:
- return "FA"
- return f"q{self.id}"
- def isCompact(self) -> bool:
- return self.__isCompact
- def isDeterministic(self) -> bool:
- return self.__isDeterministic
- def isAccepting(self) -> bool:
- return self.acceptSymbol != -1
- def isFinal(self) -> bool:
- return len(self.transitions) == 0
- def isNeutral(self) -> bool:
- return self.acceptSymbol == -1 and len(self.transitions) == 1 and self.transitions[0].isEpsilon()
- def isTrap(self) -> bool:
- return self.acceptSymbol == -1 and len(self.transitions) == 0
- def addEpsilon(self, to: FA, compact : bool = True) -> None:
- if compact == True:
- i: int = 0
- while i < len(to.transitions):
- fat = to.transitions[i]
- if fat.isEpsilon() == False:
- self.addTransition(FARange(fat.min, fat.max), fat.to, True)
- else:
- self.addEpsilon(fat.to, True)
- i += 1
- if self.acceptSymbol < 0 and to.acceptSymbol > -1:
- self.acceptSymbol = to.acceptSymbol
- else:
- found: bool = False
- i: int = 0
- while i < len(to.transitions):
- fat = to.transitions[i]
- if fat.isEpsilon() == False:
- break
- if fat.to is to:
- found = True
- break
- i += 1
- if found == False:
- self.transitions.insert(i,FATransition(to))
- self.__isCompact = False
- self.__isDeterministic = False
- def addTransition(self, rng: FARange, to: FA, compact: bool = True) -> None:
- if rng.min == -1 and rng.max == -1:
- self.addEpsilon(to, compact)
- return
- if rng.min > rng.max:
- tmp = rng.min
- rng.min = rng.max
- rng.max = tmp
- i: int = 0
- insert: int = -1
- while i < len(self.transitions):
- fat = self.transitions[i]
- if to is fat.to and rng.min == fat.min and rng.max == fat.max:
- return
- if self.__isDeterministic:
- if rng.intersects(FARange(fat.min, fat.max)):
- self.__isDeterministic = False
- if rng.max > fat.max:
- insert = i
- if self.__isDeterministic == False and rng.max < fat.min:
- break
- i += 1
- self.transitions.insert(insert+1,FATransition(to, rng.min, rng.max))
- def clearTransitions(self) -> None:
- self.transitions.clear()
- self.__isDeterministic = True
- self.__isCompact = True
- @staticmethod
- def getFirstAcceptSymbol(states: list[FA]) -> int:
- for state in states:
- if state.acceptSymbol != -1:
- return state.acceptSymbol
- return -1
- def fillClosure(self, result: list[FA] | None = None) -> list[FA]:
- if result is None:
- result = []
- if self in result:
- return result
- result.append(self)
- i = 0
- while i < len(self.transitions):
- trn = self.transitions[i]
- trn.to.fillClosure(result)
- i += 1
- return result
- def fillEpsilonClosure(self, result : list[FA] | None = None) -> list[FA]:
- if result is None:
- result = []
- if self in result:
- return result
- result.append(self)
- if self.__isCompact == True:
- return result
- for trn in self.transitions:
- if trn.isEpsilon() == True:
- if trn.to.__isCompact == True:
- if not (trn.to in result):
- result.append(trn.to)
- else:
- trn.to.fillEpsilonClosure(result)
- else:
- break
- return result
- @staticmethod
- def fillEpsilonClosureAll(states: list[FA], result: list[FA] | None = None) -> list[FA]:
- if result is None:
- result = []
- for state in states:
- state.fillEpsilonClosure(result)
- return result
- def clone(self) -> FA:
- closure = self.fillClosure()
- nclosure: list[FA] = []
- for cfa in closure:
- nfa = FA(cfa.acceptSymbol)
- nfa.__isDeterministic = cfa.__isDeterministic
- nfa.__isCompact = cfa.__isCompact
- nfa.__minimizationTag = cfa.__minimizationTag
- nfa.__fromStates = cfa.__fromStates.copy()
- nfa.id = cfa.id
- nclosure.append(nfa)
- i: int = 0
- while i< len(nclosure):
- cfa = closure[i]
- nfa = nclosure[i]
- j: int = 0
- for fat in cfa.transitions:
- nfa.transitions.append(FATransition(nclosure[closure.index(fat.to)], fat.min, fat.max))
- i += 1
- return nclosure[0]
- def totalize(self, closure: list[FA] | None = None) -> None:
- if closure is None:
- closure = self.fillClosure()
- s = FA()
- s.transitions.append(FATransition(s, 0, 0x10ffff))
- for p in closure:
- maxi = 0
- sortedTrans = sorted(p.transitions)
- for t in sortedTrans:
- if t.isEpsilon() == False:
- if t.min > maxi:
- p.transitions.append(FATransition(s, maxi, t.min - 1))
- if t.max + 1 > maxi:
- maxi = t.max + 1
- if maxi <= 0x10ffff:
- p.transitions.append(FATransition(s, maxi, 0x10ffff))
- @staticmethod
- def _determinize(target: FA, progress: FAProgress = None) -> None:
- target.setIds()
- if progress is None:
- progress = FAProgress()
- prog = 0
- id = 0
- progress.report(0)
- closure = target.fillClosure()
- p = set()
- for ffa in closure:
- p.add(0)
- for trn in ffa.transitions:
- if trn.isEpsilon() == False:
- p.add(trn.min)
- if trn.max < 0x10ffff:
- p.add(trn.max + 1)
- points = sorted(p)
- prog += 1
- progress.report(prog)
- sets = dict() # new Dictionary<_KeySet<FA>, _KeySet<FA>>()
- working = [] # new Queue<_KeySet<FA>>();
- dfaMap = dict() # new Dictionary<_KeySet<FA>, FA>()
- epscl = []
- ecs = [] # new List<FA>()
- efcs = None # List<FA>
- epscl = []
- target.fillEpsilonClosure(epscl)
- i = 0
- initial = list()
- while i < len(epscl):
- efa = epscl[i]
- initial.append(efa)
- i += 1
- fsinit = frozenset(initial)
- sets[fsinit]= initial
- working.append(initial)
- result = FA()
- result.__fromStates = list(initial)
- result.id = id
- id += 1
- result.__isDeterministic = True
- for afa in initial:
- if afa.acceptSymbol != -1:
- result.acceptSymbol = afa.acceptSymbol
- break
- prog+=1
- progress.report(prog)
- # powerset/subset construction
- dfaMap[frozenset(initial)]= result
- while len(working) > 0:
- # get the nextNode set
- s = working.pop(0)
- # get the nextNode DFA out of the map
- # of (NFA states)->(dfa state)
- dfa = dfaMap.get(frozenset(s))
- # find the first accepting
- # state if any, and assign
- # it to the new DFA
- for q in s:
- if q.acceptSymbol!=-1:
- dfa.acceptSymbol = q.acceptSymbol
- break
- # for each range in the input alphabet
- i = 0
- while i < len(points):
- pnt = points[i]
- dststates = list()
- for c in s:
- # TODO: Might be able to eliminate the
- # epsilon closure here. Needs testing
- ecs.clear()
- if c.__isCompact == False:
- c.fillEpsilonClosure(ecs)
- else:
- ecs.append(c)
- j = 0
- while j < len(ecs):
- efa = ecs[j]
- # basically if we intersect somewhere on the input alphabet,
- # which we should, then we add the destination state(s) to the set
- k = 0
- while k < len(efa.transitions):
- trns = efa.transitions[k]
- # skip the epsilon closure
- # we don't need it
- if trns.isEpsilon() == True:
- k += 1
- continue
- # TODO: can probably early out here
- # if pnt > trns.Max?
- if trns.min <= pnt and pnt <= trns.max:
- if trns.to.__isCompact == True:
- if not (trns.to in dststates):
- dststates.append(trns.to)
- else:
- if efcs is None:
- efcs = []
- efcs.clear()
- trns.to.fillEpsilonClosure(efcs)
- m = 0
- while m < len(efcs):
- if not (efcs[m] in dststates):
- dststates.append(efcs[m])
- m += 1
- k+= 1
- j +=1
- # is this a new set or an
- # existing one?
- fset = frozenset(dststates)
- if not (fset in sets):
- sets[fset]=dststates
- # add another work item
- working.append(dststates)
- # make a new DFA state
- newfa = FA()
- newfa.__fromStates = list(dststates)
- newfa.id = id
- id += 1
- dfaMap[fset]= newfa
- dst = dfaMap.get(fset)
- # find the first and last range to insert
- first = pnt
- last = -1
- if i + 1 < len(points):
- last = (points[i + 1] - 1)
- else:
- last = 0x10ffff
- # this should already be in sorted order
- # otherwise we'd use addTransition()
- dfa.transitions.append(FATransition(dst, first, last))
- prog+=1
- progress.report(prog)
- i += 1
- prog+=1
- progress.report(prog)
- # remove dead transitions (destinations with no accept)
- for ffa in result.fillClosure():
- itrns = list(ffa.transitions)
- ffa.transitions.clear()
- for trns in itrns:
- if FA.getFirstAcceptSymbol(trns.to.fillClosure()) != -1:
- ffa.transitions.append(trns)
- prog += 1
- progress.report(prog)
- prog += 1
- progress.report(prog)
- return result
- def toDfa(self, progress: FAProgress | None = None) -> FA:
- return self._determinize(self, progress)
- def _step(self, input: int) -> FA | None:
- ic = len(self.transitions)
- i = 0
- while i < ic:
- t = self.transitions[i]
- if input >= t.min and input <= t.max:
- return t.to
- i += 1
- return None
- @staticmethod
- def _minimize(a : FA, progress: FAProgress | None) -> FA:
- prog = 0
- if progress is None:
- progress = FAProgress()
- progress.report(prog)
- a = a.toDfa(progress)
- tr = a.transitions
- if len(tr) == 1:
- t = tr[0]
- if t.to is a and t.min == 0 and t.max == 0x10ffff:
- return a
- a.totalize()
- prog += 1
- progress.report(prog)
- cl = a.fillClosure()
- states = []
- mtag = 0
- for q in cl:
- q.__minimizationTag = mtag
- states.append(q)
- mtag += 1
- pp = []
- ic = len(cl)
- i = 0
- while i < ic:
- ffa = cl[i]
- pp.append(0)
- for t in ffa.transitions:
- pp.append(t.min)
- if t.max < 0x10ffff:
- pp.append(t.max + 1)
- i += 1
- sigma = list(pp)
- sigma.sort()
- # initialize the data structures
- reverse = []
- for s in states:
- arr = [None] * len(sigma)
- reverse.append(arr)
- prog += 1
- progress.report(prog)
- reverseNonempty = []
- i = 0
- while i < len(states):
- arr = [False]*len(sigma)
- reverseNonempty.append(arr)
- i += 1
- partition = [None]*len(states)
- prog += 1
- progress.report(prog)
- block = [None]*len(states)
- active = []
- i = 0
- while i < len(states):
- arr = [None]*len(sigma)
- active.append(arr)
- i += 1
- active2 = []
- i = 0
- while i < len(states):
- arr = [None]*len(sigma)
- active2.append(arr)
- i += 1
- pending = []
- pending2 = []
- i = 0
- while i < len(sigma):
- arr = [False]*len(states)
- pending2.append(arr)
- i+= 1
- split = []
- split2 = [False]*len(states)
- refine = []
- refine2 = [False]*len(states)
- splitBlock = [None] * len(states)
- prog += 1
- progress.report(prog)
- q = 0
- while q < len(states):
- splitBlock[q] = []
- partition[q] = []
- x = 0
- while x < len(sigma):
- reverse[q][x] = []
- active[q][x]= _FList()
- x += 1
- q += 1
- # Find the initial partition and reverse edges
- for qq in states:
- j = 0
- if qq.acceptSymbol == -1:
- j = 1
- partition[j].append(qq)
- block[qq.__minimizationTag] = j
- x = 0
- while x < len(sigma):
- y = sigma[x]
- p = qq._step(y)
- pn = p.__minimizationTag
- if reverse[pn] != None and reverse[pn][x] != None:
- reverse[pn][x].append(qq)
- reverseNonempty[pn][x] = True
- x += 1
- prog += 1
- progress.report(prog)
- # initialize the active sets
- j = 0
- while j <= 1:
- x = 0
- while x < len(sigma):
- part = partition[j]
- for qq in part:
- if reverseNonempty[qq.__minimizationTag][x] == True:
- active2[qq.__minimizationTag][x] = active[j][x].add(qq)
- x += 1
- prog += 1
- progress.report(prog)
- j += 1
- # initialize pending
- x = 0
- while x < len(sigma):
- a0 = active[0][x].count
- a1 = active[1][x].count
- j = 1
- if a0 <= a1:
- j = 0
- pending.append(_FKeyPair(j, x))
- pending2[x][j] = True
- x += 1
- # process pending until fixed point
- k = 2
- while len(pending) > 0:
- ip = pending.pop(0)
- p = ip.key
- x = ip.value
- pending2[x][p] = False
- m = active[p][x].first
- while m != None:
- for s in reverse[m.state.__minimizationTag][x]:
- if split2[s.__minimizationTag] == False:
- split2[s.__minimizationTag] = True
- split.append(s)
- j = block[s.__minimizationTag]
- splitBlock[j].append(s)
- if refine2[j] != True:
- refine2[j] = True
- refine.append(j)
- m = m.nextNode
- prog += 1
- progress.report(prog)
- # refine blocks
- for j in refine:
- if len(splitBlock[j]) < len(partition[j]):
- b1 = partition[j]
- b2 = partition[k]
- e = splitBlock[j]
- for s in e:
- b1.pop(b1.index(s)) # b1.splice(b1.indexOf(s), 1);
- b2.append(s)
- block[s.__minimizationTag] = k
- c = 0
- while c < len(sigma):
- sn = active2[s.__minimizationTag][c]
- if sn != None and sn.stateList is active[j][c]:
- sn.remove()
- active2[s.__minimizationTag][c] = active[k][c].add(s)
- c += 1
- # update pending
- c = 0
- while c < len(sigma):
- aj = active[j][c].count
- ak = active[k][c].count
- if pending2[c][j]==False and 0 < aj and aj <= ak:
- pending2[c][j] = True
- pending.append(_FKeyPair(j, c))
- else:
- pending2[c][k] = True
- pending.append(_FKeyPair(k, c))
- c += 1
- k+= 1
- sbj = splitBlock[j]
- for s in sbj:
- split2[s.__minimizationTag] = False
- refine2[j] = False
- splitBlock[j].clear()
- prog += 1
- progress.report(prog)
- split.clear()
- refine.clear()
- prog += 1
- progress.report(prog)
- # make a new state for each equiv class, set initial state
- newstates = []
- n = 0
- while n < k:
- s = FA()
- s.__isDeterministic = False
- newstates.append(s)
- pn = partition[n]
- for q in pn:
- if q is a:
- a = s
- s.id = q.id
- s.acceptSymbol = q.acceptSymbol
- s.__minimizationTag = q.__minimizationTag
- q.__minimizationTag = n
- prog += 1
- progress.report(prog)
- n += 1
- # build transitions and set acceptance
- for s in newstates:
- st = states[s.__minimizationTag]
- s.acceptSymbol = st.acceptSymbol
- for t in st.transitions:
- s.transitions.append(FATransition(newstates[t.to.__minimizationTag], t.min, t.max))
- prog += 1
- progress.report(prog)
- # remove dead transitions
- for ffa in a.fillClosure():
- itrns = list(ffa.transitions)
- ffa.transitions.clear()
- for trns in itrns:
- if -1 != FA.getFirstAcceptSymbol(trns.to.fillClosure()):
- ffa.transitions.append(trns)
- return a
- def toMinimizedDfa(self, progress: FAProgress | None = None) -> FA:
- return FA._minimize(self, progress)
- @staticmethod
- def _concat(lhs: FA, rhs: FA, compact: bool) -> FA:
- acc = filter(lambda value: value.isAccepting(),lhs.fillClosure())
- for afa in acc:
- afa.acceptSymbol = -1
- afa.addEpsilon(rhs, compact)
- def toCompact(self) -> FA:
- result = self.clone()
- closure = result.fillClosure()
- done = False
- while done == False:
- done = True
- i = 0
- while i < len(closure):
- fa = closure[i]
- j = 0
- while j < len(fa.transitions):
- fat = fa.transitions[j]
- if fat.isEpsilon():
- del fa.transitions[j]
- j -=1
- fa.addEpsilon(fat.to)
- done = False
- break
- j += 1
- i += 1
- if done == False:
- closure = closure[0].fillClosure()
- break
- fa.__isCompact = True
- return result
- def setIds(self) -> None:
- i = 0
- for fa in self.fillClosure():
- fa.id = i
- i += 1
- @staticmethod
- def literal(codepoints: Iterator[int], accept: int = 0, compact: bool = True) -> FA:
- result = FA()
- current: FA = result
- for cp in codepoints:
- current.acceptSymbol = -1
- newFa = FA()
- newFa.acceptSymbol = accept
- current.addTransition(FARange(cp, cp), newFa, compact)
- current = newFa
- return result
- @staticmethod
- def charset(ranges: Iterator[FARange], accept: int = 0, compact: bool = True) -> FA:
- result = FA()
- final = FA(accept)
- l = sorted(ranges)
- for rng in l:
- result.addTransition(rng,final,compact)
- return result
- @staticmethod
- def concat(exprs: Iterator[FA], accept: int = 0, compact: bool = True) -> FA:
- result = None
- left = None
- right = None
- for expr in exprs:
- nval = expr.clone()
- if left is None:
- if result is None:
- result = nval
- left = nval
- continue
- if right is None:
- right = nval
- nval = right.clone()
- FA._concat(left, nval, compact)
- target = result
- if right != None:
- target = right
- cl = target.fillClosure()
- for cfa in cl:
- if cfa.acceptSymbol != -1:
- cfa.acceptSymbol = accept
- if result is None:
- return FA(accept)
- return result
- @staticmethod
- def union(exprs: Iterator[FA], accept: int = 0, compact: bool = True) -> FA:
- result = FA()
- final = FA()
- final.acceptSymbol = accept
- for expr in exprs:
- nfa = expr.clone()
- nacc = nfa.fillClosure()
- for afa in nacc:
- if afa.acceptSymbol != -1:
- afa.acceptSymbol = -1
- afa.addEpsilon(final, compact)
- result.addEpsilon(nfa, compact)
- return result
- @staticmethod
- def optional(expr: Iterator[FA], accept: int = 0, compact: bool = True) -> FA:
- result = expr.clone()
- acc = result.fillClosure()
- for afa in acc:
- if afa.acceptSymbol != -1:
- afa.acceptSymbol = accept
- result.addEpsilon(afa, compact)
- return result
- @staticmethod
- def repeat(expr: FA, minOccurs: int = 0, maxOccurs: int = 0, accept: int = 0, compact: bool = True) -> FA:
- expr = expr.clone()
- if minOccurs > 0 and maxOccurs > 0 and minOccurs > maxOccurs:
- raise Exception("Minimum is greater than maximum")
- result = None
- match minOccurs:
- case -1|0:
- match maxOccurs:
- case -1|0:
- result = FA()
- result.acceptSymbol = accept
- result.addEpsilon(expr, compact)
- for afa in expr.fillClosure():
- if afa.acceptSymbol != -1:
- afa.addEpsilon(result, compact)
- return result
- case 1:
- result = FA.optional(expr, accept, compact)
- return result
- case _:
- l = []
- expr = FA.optional(expr, accept, compact)
- l.append(expr)
- i = 1
- while i < maxOccurs:
- l.append(expr.clone())
- i += 1
- result = FA.concat(l, accept, compact)
- return result
- case 1:
- match maxOccurs:
- case -1|0:
- result = FA.concat([expr, FA.repeat(expr, 0, 0, accept, compact)], accept, compact)
- return result
- case 1:
- return expr
- case _:
- result = FA.concat([expr, FA.repeat(expr, 0, maxOccurs - 1, accept, compact)], accept, compact)
- return result
- case _:
- match maxOccurs:
- case -1|0:
- result = FA.concat([FA.repeat(expr, minOccurs, minOccurs, accept, compact), FA.repeat(expr, 0, 0, accept, compact)], accept, compact)
- return result
- case 1:
- raise Exception("Should never get here")
- case _:
- if minOccurs == maxOccurs:
- l = []
- expr = FA.optional(expr, accept, compact)
- l.append(expr)
- i = 1
- while i < maxOccurs:
- l.append(expr.clone())
- i += 1
- result = FA.concat(l, accept, compact)
- return result
- result = FA.concat([FA.repeat(expr, minOccurs, minOccurs, accept, compact), FA.repeat(FA.optional(expr, accept, compact), maxOccurs - minOccurs, maxOccurs - minOccurs, accept, compact)], accept, compact)
- return result
- def fillInputTransitionRangesGroupedByState(self, includeEpsilonTransitions: bool = False, result: dict[FA,list[FARange]] = None) -> dict[FA,list[FARange]]:
- if result is None:
- result = dict()
- for fat in self.transitions:
- if includeEpsilonTransitions or fat.isEpsilon()==False:
- res = result.get(fat.to,None)
- if res is None:
- ndata = [FARange(fat.min, fat.max)]
- result[fat.to]= ndata
- else:
- res.append(FARange(fat.min, fat.max))
- for values in result.values():
- values.sort()
- return result
- @staticmethod
- def toUtf32(text: str) -> Iterator[int]:
- for s in text:
- yield ord(s)
- def toArray(self) -> list[int]:
- fa = self
- result: list[int] = []
- closure = fa.fillClosure()
- stateIndices: list[int] = []
- i: int = 0
- while i < len(closure):
- cfa = closure[i]
- stateIndices.append(len(result))
- result.append(cfa.acceptSymbol)
- itrgp = cfa.fillInputTransitionRangesGroupedByState(True)
- result.append(len(itrgp))
- for itr in itrgp.items():
- result.append(closure.index(itr[0]))
- result.append(len(itr[1]))
- for pack in FARange.toPacked(itr[1]):
- result.append(pack)
- i += 1
- state: int = 0
- while state < len(result):
- state += 1
- tlen = result[state]
- state += 1
- i = 0
- while i < tlen:
- result[state] = stateIndices[result[state]]
- state += 1
- prlen = result[state]
- state += 1
- state += prlen * 2
- i += 1
- return result
- @staticmethod
- def fromArray(arr: list[int]) -> FA:
- if len(arr)==0:
- return FA()
- si = 0
- states = dict()
- while si < len(arr):
- newFa = FA()
- states[si]= newFa
- newFa.acceptSymbol = arr[si]
- si += 1
- tlen = arr[si]
- si += 1
- i = 0
- while i < tlen:
- si += 1
- prlen = arr[si]
- si += 1
- si += prlen * 2
- i += 1
- si = 0
- while si < len(arr):
- state = states.get(si)
- si += 1
- tlen = arr[si]
- si += 1
- i = 0
- while i < tlen:
- tto = arr[si]
- si += 1
- prlen = arr[si]
- si += 1
- to = states.get(tto)
- j = 0
- while j < prlen:
- min = arr[si]
- si += 1
- max = arr[si]
- si += 1
- state.addTransition(FARange(min, max), to)
- j += 1
- i += 1
- return states.get(0)
- def toLexer(tokens: Iterator[FA], makeDfa: bool = True, compact: bool = True, progress: FAProgress | None = None) -> FA:
- if makeDfa == True:
- i: int = 0
- while i < len(tokens):
- tokens[i] = tokens[i].toMinimizedDfa(progress)
- i += 1
- result = FA()
- if makeDfa == True:
- for token in tokens:
- result.addEpsilon(token.toMinimizedDfa(progress), True)
- else:
- for token in tokens:
- result.addEpsilon(token, compact)
- if makeDfa == True:
- return result.toDfa(progress)
- else:
- return result
- @staticmethod
- def fillMove(states: list[FA], codepoint: int, result: list[FA] | None = None) -> list[FA]:
- if result is None:
- result = []
- for state in states:
- for fat in state.transitions:
- # epsilon dsts should already be in states:
- if fat.isEpsilon():
- continue
- if codepoint < fat.min:
- break
- if codepoint <= fat.max:
- fat.to.fillEpsilonClosure(result)
- return result
- @staticmethod
- def __normalizeSortedRangeList(pairs):
- i = 1
- while i < len(pairs):
- if pairs[i - 1].max + 1 >= pairs[i].min:
- nr = FARange(pairs[i - 1].min, pairs[i].max)
- pairs[i - 1] = nr
- del pairs[i]
- i -= 1 # compensated for by i += 1 in for loop
- i += 1
- i = 1
- @staticmethod
- def __fromHexChar(h):
- if ord(":") > h and ord("/") < h:
- return h - ord("0")
- if ord("G") > h and ord("@") < h:
- return h - ord("7") # 'A'-10
- if ord("g") > h and ord("`") < h:
- return h - ord("W") # 'a'-10
- raise Exception("The value was not hex.")
- @staticmethod
- def __isHexChar(h):
- if ord(":") > h and ord("/") < h:
- return True
- if ord("G") > h and ord("@") < h:
- return True
- if ord("g") > h and ord("`") < h:
- return True
- return False
- @staticmethod
- def __parseModifier(expr, pc, accept, compact):
- if pc.codepoint == -1:
- return expr
- position = pc.position
- match chr(pc.codepoint):
- case "*":
- expr = FA.repeat(expr, 0, 0, accept, compact)
- pc.advance()
- case "+":
- expr = FA.repeat(expr, 1, 0, accept, compact)
- pc.advance()
- case "?":
- expr = FA.optional(expr, accept, compact)
- pc.advance()
- case "{":
- pc.advance()
- pc.trySkipWhiteSpace()
- pc.expecting(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ',', '}'])
- min = -1
- max = -1
- if ord(",") != pc.codepoint and ord("}") != pc.codepoint:
- l = len(pc.captureBuffer)
- if pc.tryReadDigits() == False:
- pc.expecting(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
- min = int(pc.captureBuffer[l:])
- pc.trySkipWhiteSpace()
- if ord(",") == pc.codepoint:
- pc.advance()
- pc.trySkipWhiteSpace()
- pc.expecting(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '}'])
- if ord("}") != pc.codepoint:
- l = len(pc.captureBuffer)
- pc.tryReadDigits()
- max = int(pc.captureBuffer[l:])
- pc.trySkipWhiteSpace()
- else:
- max = min
- pc.expecting(['}'])
- pc.advance()
- expr = FA.repeat(expr, min, max, accept, compact)
- return expr
- @staticmethod
- def __parseEscapePart(pc):
- if -1 == pc.codepoint:
- return -1
- match chr(pc.codepoint):
- case "f":
- pc.advance()
- return ord("\f")
- case "v":
- pc.advance()
- return ord("\v")
- case "t":
- pc.advance()
- return ord("\t")
- case "n":
- pc.advance()
- return ord("\n")
- case "r":
- pc.advance()
- return ord("\r")
- case "x":
- if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
- return ord("x")
- b = FA.__fromHexChar(pc.codepoint)
- if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
- return b
- b <<= 4
- b |= FA.__fromHexChar(pc.codepoint)
- if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
- return b
- b <<= 4
- b |= FA.__fromHexChar(pc.codepoint)
- if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
- return b
- b <<= 4
- b |= FA.__fromHexChar(pc.codepoint)
- return b
- case "u":
- if -1 == pc.advance():
- return ord("u")
- u = FA.__fromHexChar(pc.codepoint)
- u <<= 4
- if -1 == pc.advance():
- return u
- u |= FA.__fromHexChar(pc.codepoint)
- u <<= 4
- if -1 == pc.advance():
- return u
- u |= FA.__fromHexChar(pc.codepoint)
- u <<= 4
- if -1 == pc.advance():
- return u
- u |= FA.__fromHexChar(pc.codepoint)
- return u
- case _:
- i = pc.codepoint
- pc.advance()
- return i
- @staticmethod
- def __parseRangeEscapePart(pc):
- if pc.codepoint == -1:
- return -1
- match chr(pc.codepoint):
- case "0":
- pc.advance()
- return ord("\0")
- case "f":
- pc.advance()
- return ord("\f")
- case "v":
- pc.advance()
- return ord("\v")
- case "t":
- pc.advance()
- return ord("\t")
- case "n":
- pc.advance()
- return ord("\n")
- case "r":
- pc.advance()
- return ord("\r")
- case "x":
- if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
- return ord("x")
- b = FA.__fromHexChar(pc.codepoint)
- if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
- return b
- b <<= 4
- b |= FA.__fromHexChar(pc.codepoint)
- if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
- return b
- b <<= 4
- b |= FA.__fromHexChar(pc.codepoint)
- if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
- return b
- b <<= 4
- b |= FA.__fromHexChar(pc.codepoint)
- return b
- case "u":
- if -1 == pc.advance():
- return ord("u")
- u = FA.__fromHexChar(pc.codepoint)
- u <<= 4
- if -1 == pc.advance():
- return u
- u |= FA.__fromHexChar(pc.codepoint)
- u <<= 4
- if -1 == pc.advance():
- return u
- u |= FA.__fromHexChar(pc.codepoint)
- u <<= 4
- if -1 == pc.advance():
- return u
- u |= FA.__fromHexChar(pc.codepoint)
- return u
- case _:
- i = pc.codepoint
- pc.advance()
- return i
- @staticmethod
- def __parseSet(pc):
- result = []
- pc.ensureStarted()
- pc.expecting(['['])
- pc.advance()
- pc.expecting([])
- firstRead = True
- firstChar = 0
- readFirstChar = False
- wantRange = False
- isNot = False
- if ord("^") == pc.codepoint:
- isNot = True
- pc.advance()
- pc.expecting([])
- while -1 != pc.codepoint and (firstRead == True or ord("]") != pc.codepoint):
- if wantRange == False:
- # can be a single char,
- # a range
- # or a named character class
- if ord("[") == pc.codepoint: # named char class
- epos = pc.position
- pc.advance()
- pc.expecting([])
- if ord(":") != pc.codepoint:
- firstChar = ord("[")
- readFirstChar = True
- else:
- firstRead = False
- pc.advance()
- pc.expecting([])
- ll = len(pc.captureBuffer)
- if pc.tryReadUntil(ord(":"), False) == False:
- raise Exception(f"Expecting character class at {pc.position}")
- pc.expecting([':'])
- pc.advance()
- pc.expecting([']'])
- pc.advance()
- cls = pc.captureBuffer[ll:]
- ranges = FACharacterClasses.known().get(cls,None);
- if ranges==None:
- raise Exception(f"Unknown character class \"{cls}\" specified at {epos}")
- for rng in FARange.toUnpacked(ranges):
- result.append(rng)
- readFirstChar = False
- wantRange = False
- firstRead = False
- continue
- if readFirstChar == False:
- if ord("\\") == pc.codepoint:
- pc.advance()
- firstChar = FA.__parseRangeEscapePart(pc)
- else:
- firstChar = pc.codepoint
- pc.advance()
- pc.expecting([])
- readFirstChar = True
- else:
- if ord("-") == pc.codepoint:
- pc.advance()
- pc.expecting([])
- wantRange = True
- else:
- result.append(FARange(firstChar, firstChar))
- readFirstChar = False
- firstRead = False
- else:
- if ord("\\") != pc.codepoint:
- ch = pc.codepoint
- pc.advance()
- pc.expecting([])
- result.append(FARange(firstChar, ch))
- else:
- min = firstChar
- pc.advance()
- result.append(FARange(min, FA.__parseRangeEscapePart(pc)))
- wantRange = False
- readFirstChar = False
- if readFirstChar == True:
- result.append(FARange(firstChar, firstChar))
- if wantRange == True:
- result.append(FARange(ord("-"),ord("-")))
- pc.expecting(["]"])
- pc.advance()
- return (isNot, result)
- @staticmethod
- def __invertRanges(ranges: list[FARange]):
- if ranges is None:
- return
- last = 0x10ffff
- e = iter(ranges)
- n = next(e,None)
- if n is None:
- yield FARange(0,0x10ffff)
- return
- if n.min > 0:
- yield FARange(0,n.min - 1)
- last = n.max
- if 0x10ffff <= last:
- return
- elif n.min == 0:
- last = n.max
- if 0x10ffff <= last:
- return
- n = next(e, None)
- while not (n is None):
- if (0x10ffff <= last):
- return
- if last + 1 < n.min:
- yield FARange(last+1,n.min-1)
- last = n.max
- n = next(e, None)
- if 0x10ffff > last:
- yield FARange(last+1,0x10ffff)
- @staticmethod
- def __toNotRanges(ranges):
- if ranges is None:
- return
- # expects ranges to be normalized
- last = 0x10ffff
- if hasattr(ranges,'__iter__'):
- e = iter(ranges)
- else:
- e = ranges
- cur = next(e, None)
- if cur is None:
- yield FARange(0x0, 0x10ffff)
- return
- if cur.min > 0:
- yield FARange(0, cur.min - 1)
- last = cur.max
- if 0x10ffff <= last:
- return
- elif cur.min == 0:
- last = cur.max
- if 0x10ffff <= last:
- return
- while not (cur is None):
- if 0x10ffff <= last:
- return
- if last + 1 < cur.min:
- yield FARange(last + 1, (cur.min - 1))
- last = cur.max
- cur = next(e, None)
- if 0x10ffff >= last:
- yield FARange((last + 1), 0x10ffff)
- @staticmethod
- def __parse(pc, accept = 0, compact = True):
- result = None
- nextExpr = None
- ich = 0
- pc.ensureStarted()
- while pc.codepoint != -1:
- match chr(pc.codepoint):
- #case -1:
- # if result is None:
- # # empty string
- # result = FA(accept)
- # return result
- case ".":
- dot = FA.charset([FARange(0, 0x10ffff)], accept, compact)
- pc.advance()
- dot = FA.__parseModifier(dot, pc, accept, compact)
- if result is None:
- result = dot
- else:
- result = FA.concat([result, dot], accept, compact)
- case "\\":
- pc.advance()
- pc.expecting([])
- isNot = False
- ch = chr(pc.codepoint)
- match ch:
- case ch if ch in ["P", "p"]:
- if pc.codepoint==ord("P"):
- isNot = True
- pc.advance()
- pc.expecting(['{'])
- uc = ""
- while -1 != pc.advance() and ord("}")!=pc.codepoint:
- uc += chr(pc.codepoint)
- pc.expecting(['}'])
- pc.advance()
- uci = 0
- match uc:
- case "Pe":
- uci = 21
- case "Pc":
- uci = 19
- case "Cc":
- uci = 14
- case "Sc":
- uci = 26
- case "Pd":
- uci = 19
- case "Nd":
- uci = 8
- case "Me":
- uci = 7
- case "Pf":
- uci = 23
- case "Cf":
- uci = 15
- case "Pi":
- uci = 22
- case "Nl":
- uci = 9
- case "Zl":
- uci = 12
- case "Ll":
- uci = 1
- case "Sm":
- uci = 25
- case "Lm":
- uci = 3
- case "Sk":
- uci = 27
- case "Mn":
- uci = 5
- case "Ps":
- uci = 20
- case "Lo":
- uci = 4
- case "Cn":
- uci = 29
- case "No":
- uci = 10
- case "Po":
- uci = 24
- case "So":
- uci = 28
- case "Zp":
- uci = 13
- case "Co":
- uci = 17
- case "Zs":
- uci = 11
- case "Mc":
- uci = 6
- case "Cs":
- uci = 16
- case "Lt":
- uci = 2
- case "Lu":
- uci = 0
- if isNot:
- nextExpr = FA.charset(FARange.toUnpacked(FACharacterClasses.unicodeCategories[uci]), accept, compact)
- else:
- nextExpr = FA.charset(FARange.toUnpacked(FACharacterClasses.notUnicodeCategories[uci]), accept, compact)
- case "d":
- nextExpr = FA.charset(FARange.toUnpacked(FACharacterClasses.digit), accept, compact)
- pc.advance()
- case "D":
- nextExpr = FA.charset(FA.__toNotRanges(FARange.toUnpacked(FACharacterClasses.digit)), accept, compact)
- pc.advance()
- case "s":
- nextExpr = FA.charset(FARange.toUnpacked(FACharacterClasses.space), accept, compact)
- pc.advance()
- case "S":
- nextExpr = FA.charset(FA.__toNotRanges(FARange.toUnpacked(FACharacterClasses.space)), accept, compact)
- pc.advance()
- case "w":
- nextExpr = FA.charset(FARange.toUnpacked(FACharacterClasses.word), accept, compact);
- pc.advance()
- case "W":
- nextExpr = FA.charset(FA.__toNotRanges(FARange.toUnpacked(FACharacterClasses.word)), accept, compact);
- pc.advance()
- case _:
- ich = FA.__parseEscapePart(pc)
- if -1 != ich:
- nextExpr = FA.literal([ich], accept, compact)
- else:
- pc.expecting([]) # throw an error
- nextExpr = FA.__parseModifier(nextExpr, pc, accept, compact)
- if not (result is None):
- result = FA.concat([result, nextExpr], accept, compact)
- else:
- result = nextExpr
- case ")":
- return result
- case "(":
- pc.advance()
- if pc.codepoint == ord("?"):
- pc.advance()
- pc.expecting([':'])
- pc.advance()
- pc.expecting([])
- nextExpr = FA.__parse(pc, accept, compact)
- pc.expecting([')'])
- if -1 == pc.advance():
- if result is None:
- return nextExpr
- else:
- return FA.concat([result, nextExpr], accept, compact)
- nextExpr = FA.__parseModifier(nextExpr, pc, accept, compact)
- if result is None:
- result = nextExpr
- else:
- result = FA.concat([result, nextExpr], accept, compact)
- case "|":
- if -1 != pc.advance():
- nextExpr = FA.__parse(pc, accept, compact)
- result = FA.union([result, nextExpr], accept, compact)
- else:
- result = FA.optional(result, accept, compact)
- case "[":
- seti = FA.__parseSet(pc)
- sortset = seti[1]
- sortset.sort()
- FA.__normalizeSortedRangeList(sortset)
- if seti[0] == True:
- sortset = list(FA.__toNotRanges(sortset))
- nextExpr = FA.charset(sortset, accept)
- nextExpr = FA.__parseModifier(nextExpr, pc, accept, compact)
- if result is None:
- result = nextExpr
- else:
- result = FA.concat([result, nextExpr], accept, compact)
- case _:
- ich = pc.codepoint
- nextExpr = FA.literal([ich], accept, compact)
- pc.advance()
- nextExpr = FA.__parseModifier(nextExpr, pc, accept, compact)
- if result is None:
- result = nextExpr
- else:
- result = FA.concat([result, nextExpr], accept, compact)
- return result
- @staticmethod
- def parse(expression: str, accept: int = 0, compact: bool = True) -> FA:
- pc = _ParseContext(expression)
- result = FA.__parse(pc, accept, compact)
- if result is None:
- result = FA(accept)
- return result
- @staticmethod
- def acceptingFilter(state: FA) -> bool:
- return state.acceptSymbol != -1
- @staticmethod
- def finalFilter(state: FA) -> bool:
- return state.isFinal()
- @staticmethod
- def neutralFilter(state: FA) -> bool:
- return state.isNeutral()
- @staticmethod
- def trapFilter(state: FA) -> bool:
- return state.isTrap()
- def fillFind(self, predicate: Callable[[FA], bool], result: list[FA] = None) -> list[FA]:
- if result is None:
- result = []
- if self in result:
- return result
- if predicate(self) == True:
- result.append(self)
- for trn in self.transitions:
- trn.to.fillFind(predicate, result)
- return result
- def findFirst(self, predicate: Callable[[FA], bool]) -> FA | None:
- if predicate(self) == True:
- return self
- for trn in self.transitions:
- result = trn.to.findFirst(predicate)
- if not (result is None):
- return result
- return None
- @staticmethod
- def __appendCodepointTo(codepoint, result):
- ch = chr(codepoint)
- match ch:
- case '.'|'['|']'|'^'|'-'|'+'|'?'|'('|')'|'\\':
- result += '\\'
- result += ch
- return;
- case '\t':
- result += "\\t"
- case '\n':
- result += "\\n"
- case '\r':
- result += "\\r"
- case '\0':
- result +="\\0"
- case '\f':
- result += "\\f"
- case '\v':
- result += "\\v"
- case '\b':
- result += "\\b"
- case _:
- if codepoint<32 or codepoint>127:
- if len(ch) == 1:
- result += "\\u"
- result += format(codepoint,'04x')
- else:
- result += "\\U"
- result += format(codepoint,'08x')
- else:
- result += ch
- return result
- @staticmethod
- def __appendRangeCodepointTo(codepoint, result):
- ch = chr(codepoint)
- match ch:
- case '.'|'['|']'|'^'|'-'|'('|')'|'{'|'}'|'\\':
- result += '\\'
- result += ch
- case '\t':
- result += "\\t"
- case '\n':
- result += "\\n"
- case '\r':
- result += "\\r"
- case '\0':
- result +="\\0"
- case '\f':
- result += "\\f"
- case '\v':
- result += "\\v"
- case '\b':
- result += "\\b"
- case _:
- if codepoint<32 or codepoint>127:
- if len(ch) == 1:
- result += "\\u"
- result += format(codepoint,'04x')
- else:
- result += "\\U"
- result += format(codepoint,'08x')
- else:
- result += ch
- return result
- @staticmethod
- def __appendRangeTo(ranges, index, result):
- first = ranges[index].min
- last = ranges[index].max
- if first == 0 and last == 1114111:
- return result + '.'
- result = FA.__appendRangeCodepointTo(first, result)
- if last == first:
- return result
- if last == first + 1: # spit out 1 and 2 length ranges as flat chars
- result = FA.__appendRangeCodepointTo(last, result)
- return result
- elif last == first + 2:
- result = FA.__appendRangeCodepointTo(first+1, result)
- result = FA.__appendRangeCodepointTo(last, result)
- return result
- result +='-'
- result = FA.__appendRangeCodepointTo(last, result)
- return result
- @staticmethod
- def __appendRangesTo(ranges, result):
- i = 0
- while i < len(ranges):
- result = FA.__appendRangeTo(ranges,i,result)
- i+= 1
- return result
- @staticmethod
- def __toExprFillEdgesIn(edges, fa, result):
- for edge in edges:
- if edge.toState is fa:
- result.append(edge)
- @staticmethod
- def __toExprFillEdgesOut(edges, fa, result):
- for edge in edges:
- if edge.fromState is fa:
- result.append(edge)
- @staticmethod
- def __toExpressionFillEdgesOrphanState(edges, fa, result):
- for edge in edges:
- if ((edge.fromState is fa) or (edge.toState is fa)):
- continue
- result.append(edge)
- @staticmethod
- def __toExpressionOrJoin(strings):
- if len(strings) == 0:
- return ""
- if len(strings) == 1:
- return strings[0]
- return "("+"|".join(strings)+")"
- @staticmethod
- def __toExpressionKleeneStar(s, noWrap,result):
- if s is None or len(s) == 0:
- return result
- if noWrap == True or len(s) == 1:
- result += f"{s}*"
- return result
- result += f"({s})*"
- return result
- @staticmethod
- def __toExpression(fa: FA) -> str:
- closure: list[FA] = []
- fsmEdges: list[_ExpEdge] = []
- first: FA = fa
- final: FA = None
- # linearize the state machine
- acc: list[FA] = []
- for f in first.fillClosure():
- if f.acceptSymbol != -1:
- acc.append(f)
- if len(acc) == 1:
- final = acc[0]
- elif len(acc) > 1:
- fa = fa.clone()
- first = fa
- acc.clear()
- for f in fa.fillClosure():
- if f.acceptSymbol != -1:
- acc.append(f)
- final = FA(acc[0].acceptSymbol)
- for a in acc:
- a.addEpsilon(final, False)
- a.acceptSymbol = -1
- first.fillClosure(closure)
- sb = [] # string builder
- # build the machine from the FA
- trnsgrp = dict()
- for cfa in closure:
- trnsgrp.clear()
- for trns in cfa.fillInputTransitionRangesGroupedByState(True,trnsgrp).items():
- sb = ""
- if len(trns[1]) == 1 and trns[1][0].min == trns[1][0].max:
- rng = trns[1][0]
- if rng.min == -1 or rng.max == -1:
- eedge = _ExpEdge()
- eedge.exp = ""
- eedge.fromState = cfa
- eedge.toState = trns[0]
- fsmEdges.append(eedge)
- continue
- sb = FA.__appendCodepointTo(rng.min, sb)
- else:
- sb += "["
- sb = FA.__appendRangesTo(trns[1], sb)
- sb += "]"
- edge = _ExpEdge()
- edge.exp = sb
- edge.fromState = cfa
- edge.toState = trns[0]
- fsmEdges.append(edge)
- tmp = FA()
- tmp.addEpsilon(first, False)
- q0 = first
- first = tmp
- tmp = FA(final.acceptSymbol)
- qLast = final
- final.acceptSymbol = -1
- final.addEpsilon(tmp, False)
- final = tmp
- # add first and final
- newEdge = _ExpEdge()
- newEdge.exp = ""
- newEdge.fromState = first
- newEdge.toState = q0
- fsmEdges.append(newEdge)
- newEdge = _ExpEdge()
- newEdge.exp = ""
- newEdge.fromState = qLast
- newEdge.toState = final
- fsmEdges.append(newEdge)
- closure[0].setIds()
- closure.insert(0, first)
- closure.append(final)
- inEdges: list[_ExpEdge] = []
- outEdges: list[_ExpEdge] = []
- while len(closure) > 2:
- node = closure[1]
- loops = []
- inEdges.clear()
- FA.__toExprFillEdgesIn(fsmEdges, node, inEdges)
- for edge in inEdges:
- if edge.fromState is edge.toState:
- loops.append(edge.exp)
- sb = ""
- sb = FA.__toExpressionKleeneStar(FA.__toExpressionOrJoin( loops),len(loops) > 1, sb)
- middle = sb
- for inEdge in inEdges:
- if inEdge.fromState is inEdge.toState:
- continue
- outEdges.clear()
- FA.__toExprFillEdgesOut(fsmEdges, node, outEdges)
- for outEdge in outEdges:
- if outEdge.fromState is outEdge.toState:
- continue
- expEdge = _ExpEdge()
- expEdge.fromState = inEdge.fromState
- expEdge.toState = outEdge.toState
- sb = ""
- sb += inEdge.exp
- sb += middle
- sb += outEdge.exp
- expEdge.exp = sb
- fsmEdges.append(expEdge)
- # reuse inedges since we're not using it
- inEdges.clear()
- FA.__toExpressionFillEdgesOrphanState(fsmEdges, node,inEdges)
- fsmEdges.clear()
- for edge in inEdges:
- fsmEdges.append(edge)
- closure.remove(node)
- sb = ""
- if len(fsmEdges) == 1:
- return fsmEdges[0].exp
- if len(fsmEdges) > 1:
- sb+="("
- sb+=fsmEdges[0].exp
- i = 1
- while i < len(fsmEdges):
- sb += "|"
- edge = fsmEdges[i]
- sb += edge.exp
- i += 1
- sb += ")"
- return sb
- def toString(self, format: str | None = "") -> str:
- if (format is None) or len(format) == 0:
- return str(self)
- if format == "e":
- return FA.__toExpression(self)
- raise Exception("Invalid format specifier")
- @staticmethod
- def __escapeLabel(label: str):
- if (label is None) or len(label) == 0:
- return label
- result = label.replace("\\", "\\\\")
- result = result.replace("\"", "\\\"")
- result = result.replace("\n", "\\n")
- result = result.replace("\r", "\\r")
- result = result.replace("\0", "\\0")
- result = result.replace("\v", "\\v")
- result = result.replace("\t", "\\t")
- result = result.replace("\f", "\\f")
- return result
- @staticmethod
- def __writeCompoundDotTo(closure: list[FA], writer: list[str], options: FADotGraphOptions | None = None) -> None:
- writer.append("digraph FA {\n")
- vert: bool = True
- if options is None or options.vertical == False:
- writer.append("rankdir=LR\n")
- vert = False
- writer.append("node [shape=circle]\n")
- opt2 = FADotGraphOptions()
- opt2.debugSourceNfa = None
- if options is None:
- options = FADotGraphOptions()
- opt2.statePrefix = options.statePrefix
- opt2.debugString = options.debugString
- opt2.debugShowNfa = False
- opt2.dpi = options.dpi
- opt2.acceptSymbolNames = options.acceptSymbolNames
- opt2.hideAcceptSymbolIds = options.hideAcceptSymbolIds
- opt2.blockEnds = options.blockEnds
- if vert == False:
- FA.__writeDotTo(closure, writer, options, 2)
- FA.__writeDotTo(options.debugSourceNfa.fillClosure(), writer, opt2, 1)
- else:
- FA.__writeDotTo(options.debugSourceNfa.fillClosure(), writer, opt2, 2)
- FA.__writeDotTo(closure, writer, options, 1)
- writer.append("}\n")
- @staticmethod
- def __writeDotTo(closure : list[FA], writer : list[str], options : FADotGraphOptions|None = None, clusterIndex : int = -1) -> None:
- if options is None:
- options = FADotGraphOptions()
- hasBlockEnds = options.debugShowNfa == False and (options.debugString is None) and (not (options.blockEnds is None))
- spfx = "q"
- if not (options.statePrefix is None) and len(options.statePrefix)>0:
- spfx = options.statePrefix
- pfx = ""
- if clusterIndex != -1:
- writer.append(f"subgraph cluster_{clusterIndex} {{\n")
- pfx = f"c{clusterIndex}"
- else:
- writer.append("digraph FA {\n")
- if options.vertical == False:
- writer.append("rankdir=LR\n");
- writer.append("node [shape=circle]\n")
- accepting: list[FA] = []
- finals: list[FA] = []
- neutrals: list[FA] = []
- for ffa in closure:
- if ffa.acceptSymbol != -1:
- accepting.append(ffa)
- elif ffa.isNeutral():
- neutrals.append(ffa)
- elif ffa.isFinal():
- finals.append(ffa)
- fromStates: list[FA] | None = None
- toStates: list[FA] | None = None
- tchar: int = -1
- if not (options.debugString is None):
- for ch in FA.toUtf32(options.debugString):
- if fromStates is None:
- fromStates = closure[0].fillEpsilonClosure()
- else:
- fromStates = toStates
- tchar = ch
- toStates = FA.fillMove(fromStates, ch)
- if len(toStates) == 0:
- break
- if fromStates is None:
- fromStates = closure[0].fillEpsilonClosure(fromStates)
- if not (toStates is None):
- toStates = FA.fillEpsilonClosureAll(toStates)
- else:
- toStates = fromStates
- i = 0
- for ffa in closure:
- isfrom = (not (fromStates is None)) and ffa in FA.fillEpsilonClosureAll(fromStates)
- rngGrps = ffa.fillInputTransitionRangesGroupedByState()
- for rngGrp in rngGrps.items():
- istrns = isfrom and (not (toStates is None)) and (not (options.debugString is None)) and rngGrp[0] in toStates
- di = closure.index(rngGrp[0])
- writer.append(f"{pfx}{spfx}{i}->{pfx}{spfx}{di} [label=\"")
- sb = ""
- notRanges = list(FA.__invertRanges(rngGrp[1]))
- if len(notRanges)*1.5 > len(rngGrp[1]):
- sb = FA.__appendRangesTo(rngGrp[1],sb)
- else:
- if len(notRanges) == 0:
- sb+="."
- else:
- sb+="^"
- sb = FA.__appendRangesTo(notRanges,sb)
- if len(sb) != 1 or sb == " ":
- writer.append('[')
- if len(sb) > 16:
- sb = sb[0:16]
- sb += "..."
- writer.append(f"{FA.__escapeLabel(sb)}]")
- else:
- writer.append(FA.__escapeLabel(sb))
- if istrns == False:
- writer.append("\"]")
- else:
- writer.append("\",color=green]\n")
- # do epsilons
- for fat in ffa.transitions:
- if fat.isEpsilon() == True:
- istrns = (not (toStates is None)) and (not (options.debugString is None)) and (ffa in toStates) and fat.to in toStates
- col = "gray"
- if istrns == True:
- col = "green"
- writer.append(f"{pfx}{spfx}{i}->{pfx}{spfx}{closure.index(fat.to)} [style=dashed,color={col}]\n")
- # do block ends
- bel = 0
- if not (options.blockEnds is None):
- bel = len(options.blockEnds)
- if hasBlockEnds == True and ffa.acceptSymbol != -1 and bel > ffa.acceptSymbol and (not (options.blockEnds[ffa.acceptSymbol] is None)):
- writer.append(f"{pfx}{spfx}{i}->{pfx}blockEnd{ffa.acceptSymbol}{spfx}0 [style=dashed,label=\".*?\"]\n")
- i += 1
- delim = ""
- if hasBlockEnds == True:
- bel = 0
- if not (options.blockEnds is None):
- bel = len(options.blockEnds)
- i = 0
- while i < bel:
- bfa = options.blockEnds[i]
- if not (bfa is None):
- bclose = bfa.fillClosure()
- delim = ""
- qi = 0
- while qi < len(bclose):
- cbfa = bclose[qi]
- rngGrps = cbfa.fillInputTransitionRangesGroupedByState(True)
- for rngGrp in rngGrps.items():
- di = bclose.index(rngGrp[0])
- writer.append(f"{pfx}blockEnd{i}{spfx}{qi}->{pfx}blockEnd{i}{spfx}{di} [label=\"")
- sb = FA.__appendRangesTo(rngGrp[1],sb)
- if len(sb) != 1 or sb == " ":
- middle = ""
- if len(sb) > 16:
- sb = sb[:16]
- middle = "..."
- writer.append(f"[{FA.__escapeLabel(sb)}{middle}]\"]\n")
- else:
- writer.append(f"{FA.__escapeLabel(sb)}\"]\n")
- # do epsilons
- for fat in cbfa.transitions:
- if fat.isEpsilon() == True:
- di = bclose.index(fat.to);
- writer.append(f"{pfx}blockEnd{i}{spfx}{qi}->{pfx}blockEnd{i}{spfx}{di} [style=dashed,color=gray]\n")
- qi += 1
- qi = 0
- while qi < len(bclose):
- cbfa = bclose[qi]
- writer.append(f"{pfx}blockEnd{i}{spfx}{qi} [label=<<TABLE BORDER=\"0\"><TR><TD>(be){spfx}<SUB>{qi}</SUB></TD></TR>")
- if cbfa.acceptSymbol!=-1 and options.hideAcceptSymbolIds == False:
- acc = None
- if (not (options.acceptSymbolNames is None)) and len(options.acceptSymbolNames) > i:
- acc = options.acceptSymbolNames[i]
- if acc is None:
- acc = str(i)
- acc2 = acc.replace("\"", """)
- writer.append(f"<TR><TD>{acc2}</TD></TR>")
- writer.append("</TABLE>>")
- if cbfa.acceptSymbol != -1:
- writer.append(",shape=doublecircle")
- elif cbfa.isFinal() == True or cbfa.isNeutral() == True:
- writer.append(",color=gray")
- writer.append("]\n")
- qi += 1
- i += 1
- delim = ""
- i = 0
- for ffa in closure:
- col = ""
- if not (options.debugString is None):
- if not (toStates is None):
- epstates= FA.fillEpsilonClosureAll(toStates)
- if ffa in epstates:
- col = "color=green,"
- elif ffa in epstates and (not (ffa in toStates)):
- col = "color=darkgreen,"
- else:
- col = "color=darkgreen,"
- writer.append(f"{pfx}{spfx}{i} [{col}label=<<TABLE BORDER=\"0\"><TR><TD>{spfx}<SUB>{i}</SUB></TD></TR>")
- if not (options.debugSourceNfa is None):
- fromStates = ffa.__fromStates
- if not (fromStates is None):
- brk = int(math.floor(math.sqrt(len(fromStates))))
- if len(fromStates) <= 3:
- brk = 3
- j = 0
- while j < len(fromStates):
- if j == 0:
- writer.append("<TR><TD>{ ")
- delim = ""
- elif (j % brk) == 0:
- delim = ""
- writer.append("</TD></TR><TR><TD>")
- fromFA = fromStates[j]
- writer.append(f"{delim}q<SUB>{options.debugSourceNfa.fillClosure().index(fromFA)}</SUB>")
- # putting a comma here is what we'd like
- # but it breaks dot no matter how its encoded
- delim = " "
- if j==len(fromStates)-1:
- writer.append(" }</TD></TR>")
- j += 1
- bel = 0
- if not (options.blockEnds is None):
- bel = len(options.blockEnds)
- if ffa.acceptSymbol != -1 and options.hideAcceptSymbolIds == False and not (hasBlockEnds == True and bel > ffa.acceptSymbol and (not (options.blockEnds[ffa.acceptSymbol] is None))):
- acc = None
- if not (options.acceptSymbolNames is None) and len(options.acceptSymbolNames) > ffa.acceptSymbol:
- acc = options.acceptSymbolNames[ffa.acceptSymbol]
- if acc is None:
- acc = str(ffa.acceptSymbol)
- acc2 = acc.replace("\"", """)
- writer.append(f"<TR><TD>{acc2}</TD></TR>")
- writer.append("</TABLE>>")
- isfinal = False
- if (ffa in accepting) and ((hasBlockEnds == False or len(bel) <= ffa.acceptSymbol or (options.blockEnds[ffa.acceptSymbol] is None))):
- writer.append(",shape=doublecircle")
- elif isfinal == True or ffa in neutrals:
- if ((fromStates is None) or not (ffa in fromStates)) and ((toStates is None) or not (ffa in toStates)):
- writer.append(",color=gray")
- writer.append("]\n")
- i+=1
- delim = ""
- if len(accepting) > 0:
- for ntfa in accepting:
- if hasBlockEnds == False or bel <= ntfa.acceptSymbol or (options.blockEnds is None) or (options.blockEnds[ntfa.acceptSymbol] is None):
- writer.append(f"{delim}{pfx}{spfx}{closure.index(ntfa)}")
- delim = ","
- if delim != "":
- writer.append(" [shape=doublecircle]\n")
- delim = ""
- if len(neutrals) > 0:
- for ntfa in neutrals:
- if ((fromStates is None) or (not (ntfa in fromStates))) and ((toStates is None) or (not (ntfa in toStates))):
- writer.append(f"{delim}{pfx}{spfx}{closure.index(ntfa)}")
- delim = ","
- writer.append(" [color=gray]\n")
- delim = ""
- delim = ""
- if len(finals) > 0:
- for ntfa in finals:
- writer.append(f"{delim}{pfx}{spfx}{closure.index(ntfa)}")
- delim = ","
- writer.append(" [shape=circle,color=gray]\n")
- writer.append("}\n")
- def __writeDot(self, writer: list[str], options: FADotGraphOptions | None = None) -> None:
- if (not (options is None)) and (not (options.debugSourceNfa is None)) and options.debugShowNfa == True:
- FA.__writeCompoundDotTo(self.fillClosure(), writer, options)
- else:
- FA.__writeDotTo(self.fillClosure(), writer, options)
- def renderToFile(self, path: str, options : FADotGraphOptions | None = None) -> None:
- if options is None:
- options = FADotGraphOptions()
- cmd: list[str] = ["dot"]
- arg: str = "-T"
- ext: str = os.path.splitext(path)[1].lower()
- dotwriter: list[str] = []
- match ext:
- case ".dot":
- self.__writeDot(dotwriter, options)
- output = open(path,"w")
- output.write("".join(dotwriter))
- output.close()
- return
- case ".png":
- arg += "png"
- case ".jpg"|".jpeg":
- arg += "jpg"
- case ".bmp":
- arg += "bmp"
- case ".svg":
- arg += "svg"
- case _:
- raise Exception("Unsupported output format")
- cmd.append(arg)
- if options.dpi > 0:
- cmd.append(f"-Gdpi={options.dpi}")
- self.__writeDot(dotwriter, options)
- cmd.append(f"-o{path}")
- p = Popen(cmd, stdout=None, stdin=PIPE, stderr=None, text=True)
- p.communicate(input="".join(dotwriter))[0]
- #fa = FA.parse("foo\\/(.*)",0)
- # nfa = FA.parse("[A-Z_a-z][0-9A-Z_a-z]*",0,False)
- # opts = FADotGraphOptions()
- # opts.hideAcceptSymbolIds = True
- # opts.debugShowNfa = True
- # opts.debugSourceNfa = nfa
- # opts.dpi = 300
- # opts.vertical = False
- # nfa.toDfa().renderToFile("test.jpg",opts)
- # compact = False
- # firstPart = FA.charset([FARange(ord("A"),ord("Z")),FARange(ord("a"),ord("z")),FARange(ord("_"),ord("_"))],0,compact)
- # nextPart = FA.charset([FARange(ord("A"),ord("Z")),FARange(ord("a"),ord("z")),FARange(ord("_"),ord("_")),FARange(ord("0"),ord("9"))],0,compact)
- # ident = FA.concat([firstPart,FA.repeat(nextPart,0,0,0,compact)],0,compact)
- # print(ident.toArray())
- # print(ident.toDfa().toArray())
- # print(ident.toMinimizedDfa().toArray())
- # print("PARSING ---------------")
- # ident = FA.parse("[A-Za-z_][A-Za-z_0-9]*",0,compact)
- # print(ident.toArray())
- # print(ident.toDfa().toArray())
- # print(ident.toMinimizedDfa().toArray())
Advertisement
Add Comment
Please, Sign In to add comment