Visual FA for Python (work in progress)

# Visual FA Python port
# copyright (c) 2025 by honey the codewitch
# MIT license
from __future__ import annotations
import math
import os
from subprocess import Popen, PIPE, STDOUT
from collections.abc import Iterator, Callable
class _ParseContext:
    def __init__(self, input, tabWidth = 4, position = 0, line = 1, column = 0):
        self.codepoint = -2
        self.position = 0
        self.line = 1
        self.column = 0
        self.tabWidth = 4
        self.captureBuffer = ""
        self.input = input
        self.tabWidth = tabWidth
        self.position = position
        self.line = line
        self.column = column

    def advance(self):
        if self.position >= len(self.input):
            self.codepoint = -1
        else:
            self.codepoint = ord(self.input[self.position])
        match self.codepoint:
            case 10:
                self.line += 1
                self.column = 0
            case 13:
                self.column = 0
            case 9:
                self.column = (((self.column - 1) / self.tabWidth) + 1) * self.tabWidth + 1
            case _:
                if self.codepoint > 31:
                    self.column += 1
        self.position += 1
        return self.codepoint

    def capture(self):
        self.captureBuffer += chr(self.codepoint)

    def ensureStarted(self):
        if self.codepoint == -2:
            self.advance()

    @staticmethod
    def _getCodepoints(values):
        result = []
        for v in values:
            result.append(ord(v))
        return result

    def _getErrorMessage(self, expecting):
        sb = ""
        match len(expecting):
            case 0:
                # do nothing
                sb = sb
            case 1:
                sb = ""
                if expecting[0] == -1:
                    sb += "end of input"
                else:
                    sb += "\""
                    sb += chr(expecting[0])
                    sb += "\""
            case 2:
                sb = ""
                if expecting[0] == -1:
                    sb += "end of input"
                else:
                    sb += "\""
                    sb += chr(expecting[0])
                    sb += "\""

                sb += " or "
                if expecting[1] == -1:
                    sb += "end of input"
                else:
                    sb += "\""
                    sb += chr(expecting[1])
                    sb += "\""
            case _: # length > 2
                sb = ""
                if expecting[0] == -1:
                    sb += "end of input"
                else:
                    sb += "\""
                    sb += chr(expecting[0])
                    sb += "\""

                l = len(expecting) - 1
                i = 1;
                while i < l:
                    sb += ", "
                    if -expecting[i] == -1:
                        sb += "end of input"
                    else:
                        sb += "\""
                        sb += chr(expecting[1])
                        sb += "\""

                    i += 1

                sb += ", or "
                if expecting[i] == -1:
                    sb += "end of input"
                else:
                    sb += "\""
                    sb += chr(expecting[i])
                    sb += "\""

        if self.codepoint == -1:
            if len(expecting) == 0:
                return "Unexpected end of input"
            return "Unexpected end of input. Expecting " + sb

        if len(expecting) == 0:
            return "Unexpected character \"" + chr(self.codepoint) + "\" in input"
        return "Unexpected character \"" + chr(self.codepoint) + "\" in input. Expecting " + sb

    def expecting(self, values):
        codepoints = _ParseContext._getCodepoints(values)
        if self.codepoint == -2:
            raise Exception(f"The cursor is before the beginning of the input at {self.position}");
        match len(codepoints):
            case 0:
                if self.codepoint == -1:
                    raise Exception(f"Unexpected end of input at {self.position}")
            case 1:
                if codepoints[0] != self.codepoint:
                    raise Exception(f"{self._getErrorMessage(codepoints)} at {self.position}")
            case _:
                if not (self.codepoint in codepoints):
                    raise Exception(f"{self._getErrorMessage(codepoints)} at {self.position}""IsLetter"] = FACharacterClasses.isLetter
            result["IsDigit"] = FACharacterClasses.isDigit
            result["IsLetterOrDigit"] = FACharacterClasses.isLetterOrDigit
            result["IsWhiteSpace"] = FACharacterClasses.isWhiteSpace
            result["alnum"] = FACharacterClasses.alnum
            result["alpha"] = FACharacterClasses.alpha
            result["cntrl"] = FACharacterClasses.cntrl
            result["digit"] = FACharacterClasses.digit
            result["graph"] = FACharacterClasses.graph
            result["ascii"] = FACharacterClasses.ascii
            result["blank"] = FACharacterClasses.blank
            result["lower"] = FACharacterClasses.lower
            result["print"] = FACharacterClasses.print_
            result["punct"] = FACharacterClasses.punct
            result["space"] = FACharacterClasses.space
            result["upper"] = FACharacterClasses.upper
            result["word"] = FACharacterClasses.word
            result["xdigit"] = FACharacterClasses.xdigit
            FACharacterClasses.__known = result
        return FACharacterClasses.__known

class _ExpEdge:
    def __init__(self):
        self.exp = None
        self.fromState = None
        self.toState = None

class _FListNode:
    def __init__(self, q, sl):
        self.nextNode = None
        self.prevNode = None
        self.state = q
        self.stateList = sl
        if (sl.count == 0):
            sl.count += 1
            sl.first = self
            sl.last = self
        else:
            sl.count += 1
            sl.last.nextNode = self
            self.prevNode = sl.last
            sl.last = self

    def remove(self):
        self.stateList.count -= 1
        if self.stateList.first is self:
            self.stateList.first = self.nextNode
        else:
            self.prevNode.nextNode = self.nextNode

        if self.stateList.last is self:
            self.stateList.last = self.prevNode
        else:
            self.nextNode.prevNode = self.prevNode

class _FList:
    def __init__(self):
        self.count = 0
        self.first = None
        self.last = None

    def add(self, q):
        return _FListNode(q, self)

class _FKeyPair:
    def __init__(self, key, value):
        self.key = key
        self.value = value

class FADotGraphOptions:
    def __init__(self):
        self.dpi: int = 300
        self.debugString: str | None = None
        self.blockEnds: list[FA] | None = None
        self.debugSourceNfa: FA | None = None
        self.debugShowNfa: bool = False
        self.hideAcceptSymbolIds: bool = False
        self.acceptSymbolNames: list[str] | None = None
        self.vertical: bool = False
        self.statePrefix: str = "q"

class FAProgress:
    def __init__(self, callback = None):
        self.value: int = 0
        self._callback = callback

    def report(self, value: int) -> None:
        self.value = value
        if self._callback != None:
            self._callback(value)

class FARange:
    def __init__(self, min : int = -1, max : int = -1):
        self.min = min
        self.max = max
    def __lt__(self,rhs):
        if self.max == rhs.max:
            return self.min < rhs.min
        return self.max < rhs.max
    def intersects(self, rhs: FARange):
        return (rhs.min >= self.min and rhs.min <= self.max) or (rhs.max >= self.min and rhs.max <= self.max)
    @staticmethod
    def toUnpacked(packedRanges : list[int]) -> list[FARange]:
        result = []
        length = len(packedRanges)/2
        i = 0
        while i < length:
            j = i * 2
            result.append(FARange(packedRanges[j], packedRanges[j + 1]))

            i += 1
        return result
    @staticmethod
    def toPacked(pairs : Iterator[FARange]) -> list[int]:
        result = []
        for pair in pairs:
            result.append(pair.min)
            result.append(pair.max)
        return result

class FATransition:
    def __init__(self, to: FA, min : int = -1, max : int = -1):
        self.to: FA = to
        self.min: int = min
        self.max: int = max

    def isEpsilon(self) -> bool:
        return self.min == -1 or self.max == -1

    def __lt__(self,rhs):
        if self.max == rhs.max:
            return self.min < rhs.min
        return self.max < rhs.max

class FA:
    def __init__(self, acceptSymbol : int = -1):
        self.acceptSymbol: int = acceptSymbol
        self.transitions: list[FATransition] = []
        self.__minimizationTag: int = -1
        self.__isDeterministic: bool = True
        self.__isCompact: bool = True
        self.id: int = -1
        self.__fromStates: list[FA] = []
    def __str__(self):
        if self.id == -1:
            return "FA"
        return f"q{self.id}"

    def isCompact(self) -> bool:
        return self.__isCompact

    def isDeterministic(self) -> bool:
        return self.__isDeterministic

    def isAccepting(self) -> bool:
        return self.acceptSymbol != -1

    def isFinal(self) -> bool:
        return len(self.transitions) == 0

    def isNeutral(self) -> bool:
        return self.acceptSymbol == -1 and len(self.transitions) == 1 and self.transitions[0].isEpsilon()

    def isTrap(self) -> bool:
        return self.acceptSymbol == -1 and len(self.transitions) == 0

    def addEpsilon(self, to: FA, compact : bool = True) -> None:
        if compact == True:
            i: int = 0
            while i < len(to.transitions):
                fat = to.transitions[i]
                if fat.isEpsilon() == False:
                    self.addTransition(FARange(fat.min, fat.max), fat.to, True)
                else:
                    self.addEpsilon(fat.to, True)
                i += 1
            if self.acceptSymbol < 0 and to.acceptSymbol > -1:
                self.acceptSymbol = to.acceptSymbol
        else:
            found: bool = False
            i: int = 0
            while i < len(to.transitions):
                fat = to.transitions[i]
                if fat.isEpsilon() == False:
                    break
                if fat.to is to:
                    found = True
                    break
                i += 1
            if found == False:
                self.transitions.insert(i,FATransition(to))
                self.__isCompact = False
                self.__isDeterministic = False

    def addTransition(self, rng: FARange, to: FA, compact: bool = True) -> None:
        if rng.min == -1 and rng.max == -1:
            self.addEpsilon(to, compact)
            return
        if rng.min > rng.max:
            tmp = rng.min
            rng.min = rng.max
            rng.max = tmp
        i: int = 0
        insert: int = -1
        while i < len(self.transitions):
            fat = self.transitions[i]
            if to is fat.to and rng.min == fat.min and rng.max == fat.max:
                return
            if self.__isDeterministic:
                if rng.intersects(FARange(fat.min, fat.max)):
                    self.__isDeterministic = False
            if rng.max > fat.max:
                insert = i
            if self.__isDeterministic == False and rng.max < fat.min:
                break
            i += 1
        self.transitions.insert(insert+1,FATransition(to, rng.min, rng.max))

    def clearTransitions(self) -> None:
        self.transitions.clear()
        self.__isDeterministic = True
        self.__isCompact = True

    @staticmethod
    def getFirstAcceptSymbol(states: list[FA]) -> int:
        for state in states:
            if state.acceptSymbol != -1:
                return state.acceptSymbol
        return -1

    def fillClosure(self, result: list[FA] | None = None) -> list[FA]:
        if result is None:
            result = []
        if self in result:
            return result
        result.append(self)
        i = 0
        while i < len(self.transitions):
            trn = self.transitions[i]
            trn.to.fillClosure(result)
            i += 1
        return result

    def fillEpsilonClosure(self, result : list[FA] | None = None) -> list[FA]:
        if result is None:
            result = []
        if self in result:
            return result
        result.append(self)
        if self.__isCompact == True:
            return result
        for trn in self.transitions:
            if trn.isEpsilon() == True:
                if trn.to.__isCompact == True:
                    if not (trn.to in result):
                        result.append(trn.to)
                else:
                    trn.to.fillEpsilonClosure(result)
            else:
                break
        return result

    @staticmethod
    def fillEpsilonClosureAll(states: list[FA], result: list[FA] | None = None) -> list[FA]:
        if result is None:
            result = []
        for state in states:
            state.fillEpsilonClosure(result)
        return result

    def clone(self) -> FA:
        closure = self.fillClosure()
        nclosure: list[FA] = []
        for cfa in closure:
            nfa = FA(cfa.acceptSymbol)
            nfa.__isDeterministic = cfa.__isDeterministic
            nfa.__isCompact = cfa.__isCompact
            nfa.__minimizationTag = cfa.__minimizationTag
            nfa.__fromStates = cfa.__fromStates.copy()
            nfa.id = cfa.id
            nclosure.append(nfa)
        i: int = 0
        while i< len(nclosure):
            cfa = closure[i]
            nfa = nclosure[i]
            j: int = 0
            for fat in cfa.transitions:
                nfa.transitions.append(FATransition(nclosure[closure.index(fat.to)], fat.min, fat.max))
            i += 1
        return nclosure[0]

    def totalize(self, closure: list[FA] | None = None) -> None:
        if closure is None:
            closure = self.fillClosure()

        s = FA()
        s.transitions.append(FATransition(s, 0, 0x10ffff))
        for p in closure:
            maxi = 0
            sortedTrans = sorted(p.transitions)
            for t in sortedTrans:
                if t.isEpsilon() == False:
                    if t.min > maxi:
                        p.transitions.append(FATransition(s, maxi, t.min - 1))
                    if t.max + 1 > maxi:
                        maxi = t.max + 1

            if maxi <= 0x10ffff:
                p.transitions.append(FATransition(s, maxi, 0x10ffff))

    @staticmethod
    def _determinize(target: FA, progress: FAProgress = None) -> None:
        target.setIds()
        if progress is None:
            progress = FAProgress()
        prog = 0
        id = 0
        progress.report(0)
        closure = target.fillClosure()
        p = set()
        for ffa in closure:
            p.add(0)
            for trn in ffa.transitions:
                if trn.isEpsilon() == False:
                    p.add(trn.min)
                    if trn.max < 0x10ffff:
                        p.add(trn.max + 1)

        points = sorted(p)

        prog += 1
        progress.report(prog)
        sets = dict() # new Dictionary<_KeySet<FA>, _KeySet<FA>>()
        working = [] # new Queue<_KeySet<FA>>();
        dfaMap =  dict() # new Dictionary<_KeySet<FA>, FA>()
        epscl = []
        ecs = [] # new List<FA>()
        efcs = None # List<FA>
        epscl = []
        target.fillEpsilonClosure(epscl)
        i = 0
        initial = list()
        while i < len(epscl):
            efa = epscl[i]
            initial.append(efa)
            i += 1
        fsinit = frozenset(initial)
        sets[fsinit]= initial

        working.append(initial)
        result = FA()
        result.__fromStates = list(initial)
        result.id = id
        id += 1
        result.__isDeterministic = True
        for afa in initial:
            if afa.acceptSymbol != -1:
                result.acceptSymbol = afa.acceptSymbol
                break
        prog+=1
        progress.report(prog)
        # powerset/subset construction
        dfaMap[frozenset(initial)]= result
        while len(working) > 0:
            # get the nextNode set
            s = working.pop(0)
            # get the nextNode DFA out of the map
            # of (NFA states)->(dfa state)
            dfa = dfaMap.get(frozenset(s))
            # find the first accepting
            # state if any, and assign
            # it to the new DFA
            for q in s:
                if q.acceptSymbol!=-1:
                    dfa.acceptSymbol = q.acceptSymbol
                    break
            # for each range in the input alphabet
            i = 0
            while i < len(points):
                pnt = points[i]
                dststates = list()
                for c in s:
                    # TODO: Might be able to eliminate the
                    # epsilon closure here. Needs testing
                    ecs.clear()
                    if c.__isCompact == False:
                        c.fillEpsilonClosure(ecs)
                    else:
                        ecs.append(c)
                    j = 0
                    while j < len(ecs):
                        efa = ecs[j]
                        # basically if we intersect somewhere on the input alphabet,
                        # which we should, then we add the destination state(s) to the set
                        k = 0
                        while k < len(efa.transitions):
                            trns = efa.transitions[k]
                            # skip the epsilon closure
                            # we don't need it
                            if trns.isEpsilon() == True:
                                k += 1
                                continue
                            # TODO: can probably early out here
                            # if pnt > trns.Max?
                            if trns.min <= pnt and pnt <= trns.max:
                                if trns.to.__isCompact == True:
                                    if not (trns.to in dststates):
                                        dststates.append(trns.to)
                                else:
                                    if efcs is None:
                                        efcs = []
                                    efcs.clear()
                                    trns.to.fillEpsilonClosure(efcs)
                                    m = 0
                                    while m < len(efcs):
                                        if not (efcs[m] in dststates):
                                            dststates.append(efcs[m])
                                        m += 1
                            k+= 1
                        j +=1
                # is this a new set or an
                # existing one?
                fset = frozenset(dststates)
                if not (fset in sets):
                    sets[fset]=dststates
                    # add another work item
                    working.append(dststates)
                    # make a new DFA state
                    newfa = FA()
                    newfa.__fromStates = list(dststates)
                    newfa.id = id
                    id += 1
                    dfaMap[fset]= newfa

                dst = dfaMap.get(fset)
                # find the first and last range to insert
                first = pnt
                last = -1
                if i + 1 < len(points):
                    last = (points[i + 1] - 1)
                else:
                    last = 0x10ffff
                # this should already be in sorted order
                # otherwise we'd use addTransition()
                dfa.transitions.append(FATransition(dst, first, last))
                prog+=1
                progress.report(prog)
                i += 1

            prog+=1
            progress.report(prog)
        # remove dead transitions (destinations with no accept)
        for ffa in result.fillClosure():
            itrns = list(ffa.transitions)
            ffa.transitions.clear()
            for trns in itrns:
                if FA.getFirstAcceptSymbol(trns.to.fillClosure()) != -1:
                    ffa.transitions.append(trns)
            prog += 1
            progress.report(prog)

        prog += 1
        progress.report(prog)
        return result

    def toDfa(self, progress: FAProgress | None = None) -> FA:
        return self._determinize(self, progress)

    def _step(self, input: int) -> FA | None:
        ic = len(self.transitions)
        i = 0
        while i < ic:
            t = self.transitions[i]
            if input >= t.min and input <= t.max:
                return t.to
            i += 1
        return None

    @staticmethod
    def _minimize(a : FA, progress: FAProgress | None) -> FA:
        prog = 0
        if progress is None:
            progress = FAProgress()
        progress.report(prog)
        a = a.toDfa(progress)
        tr = a.transitions
        if len(tr) == 1:
            t = tr[0]
            if t.to is a and t.min == 0 and t.max == 0x10ffff:
                return a

        a.totalize()
        prog += 1
        progress.report(prog)
        cl = a.fillClosure()
        states = []
        mtag = 0
        for q in cl:
            q.__minimizationTag = mtag
            states.append(q)
            mtag += 1

        pp = []
        ic = len(cl)
        i = 0
        while i < ic:
            ffa = cl[i]
            pp.append(0)
            for t in ffa.transitions:
                pp.append(t.min)
                if t.max < 0x10ffff:
                    pp.append(t.max + 1)
            i += 1

        sigma = list(pp)
        sigma.sort()
        # initialize the data structures
        reverse = []
        for s in states:
            arr = [None] * len(sigma)
            reverse.append(arr)

        prog += 1
        progress.report(prog)
        reverseNonempty = []
        i = 0
        while i < len(states):
            arr = [False]*len(sigma)
            reverseNonempty.append(arr)
            i += 1

        partition = [None]*len(states)
        prog += 1
        progress.report(prog)
        block = [None]*len(states)
        active = []
        i = 0
        while i < len(states):
            arr = [None]*len(sigma)
            active.append(arr)
            i += 1

        active2 = []
        i = 0
        while i < len(states):
            arr = [None]*len(sigma)
            active2.append(arr)
            i += 1

        pending = []
        pending2 = []
        i = 0
        while i < len(sigma):
            arr = [False]*len(states)
            pending2.append(arr)
            i+= 1
        split = []
        split2 = [False]*len(states)
        refine = []
        refine2 = [False]*len(states)
        splitBlock = [None] * len(states)
        prog += 1
        progress.report(prog)
        q = 0
        while q < len(states):
            splitBlock[q] = []
            partition[q] = []
            x = 0
            while x < len(sigma):
                reverse[q][x] = []
                active[q][x]= _FList()
                x += 1
            q += 1
        # Find the initial partition and reverse edges
        for qq in states:
            j = 0
            if qq.acceptSymbol == -1:
                j = 1
            partition[j].append(qq)
            block[qq.__minimizationTag] = j
            x = 0
            while x < len(sigma):
                y = sigma[x]
                p = qq._step(y)
                pn = p.__minimizationTag
                if reverse[pn] != None and reverse[pn][x] != None:
                    reverse[pn][x].append(qq)
                reverseNonempty[pn][x] = True
                x += 1

            prog += 1
            progress.report(prog)

        # initialize the active sets
        j = 0
        while j <= 1:
            x = 0
            while x < len(sigma):
                part = partition[j]
                for qq in part:
                    if reverseNonempty[qq.__minimizationTag][x] == True:
                        active2[qq.__minimizationTag][x] = active[j][x].add(qq)
                x += 1
                prog += 1
                progress.report(prog)
            j += 1

        # initialize pending
        x = 0
        while x < len(sigma):
            a0 = active[0][x].count
            a1 = active[1][x].count
            j = 1
            if a0 <= a1:
                j = 0
            pending.append(_FKeyPair(j, x))
            pending2[x][j] = True
            x += 1

        # process pending until fixed point
        k = 2
        while len(pending) > 0:
            ip = pending.pop(0)
            p = ip.key
            x = ip.value
            pending2[x][p] = False
            m = active[p][x].first
            while m != None:
                for s in reverse[m.state.__minimizationTag][x]:
                    if split2[s.__minimizationTag] == False:
                        split2[s.__minimizationTag] = True
                        split.append(s)
                        j = block[s.__minimizationTag]
                        splitBlock[j].append(s)
                        if refine2[j] != True:
                            refine2[j] = True
                            refine.append(j)
                m = m.nextNode

            prog += 1
            progress.report(prog)
            # refine blocks
            for j in refine:
                if len(splitBlock[j]) < len(partition[j]):
                    b1 = partition[j]
                    b2 = partition[k]
                    e = splitBlock[j]
                    for s in e:
                        b1.pop(b1.index(s)) # b1.splice(b1.indexOf(s), 1);
                        b2.append(s)
                        block[s.__minimizationTag] = k
                        c = 0
                        while c < len(sigma):
                            sn = active2[s.__minimizationTag][c]
                            if sn != None and sn.stateList is active[j][c]:
                                sn.remove()
                                active2[s.__minimizationTag][c] = active[k][c].add(s)
                            c += 1

                    # update pending
                    c = 0
                    while c < len(sigma):
                        aj = active[j][c].count
                        ak = active[k][c].count
                        if pending2[c][j]==False and 0 < aj and aj <= ak:
                            pending2[c][j] = True
                            pending.append(_FKeyPair(j, c))
                        else:
                            pending2[c][k] = True
                            pending.append(_FKeyPair(k, c))
                        c += 1
                    k+= 1
                sbj = splitBlock[j]
                for s in sbj:
                    split2[s.__minimizationTag] = False
                refine2[j] = False
                splitBlock[j].clear()
                prog += 1
                progress.report(prog)

            split.clear()
            refine.clear()
        prog += 1
        progress.report(prog)
        # make a new state for each equiv class, set initial state
        newstates = []
        n = 0
        while n < k:
            s = FA()
            s.__isDeterministic = False
            newstates.append(s)
            pn = partition[n]
            for q in pn:
                if q is a:
                    a = s
                s.id = q.id
                s.acceptSymbol = q.acceptSymbol
                s.__minimizationTag = q.__minimizationTag
                q.__minimizationTag = n
            prog += 1
            progress.report(prog)
            n += 1
        # build transitions and set acceptance
        for s in newstates:
            st = states[s.__minimizationTag]
            s.acceptSymbol = st.acceptSymbol
            for t in st.transitions:
                s.transitions.append(FATransition(newstates[t.to.__minimizationTag], t.min, t.max))
            prog += 1
            progress.report(prog)

        # remove dead transitions
        for ffa in a.fillClosure():
            itrns = list(ffa.transitions)
            ffa.transitions.clear()
            for trns in itrns:
                if -1 != FA.getFirstAcceptSymbol(trns.to.fillClosure()):
                    ffa.transitions.append(trns)
        return a

    def toMinimizedDfa(self, progress: FAProgress | None = None) -> FA:
        return FA._minimize(self, progress)

    @staticmethod
    def _concat(lhs: FA, rhs: FA, compact: bool) -> FA:
        acc = filter(lambda value: value.isAccepting(),lhs.fillClosure())
        for afa in acc:
            afa.acceptSymbol = -1
            afa.addEpsilon(rhs, compact)

    def toCompact(self) -> FA:
        result = self.clone()
        closure = result.fillClosure()
        done = False
        while done == False:
            done = True
            i = 0
            while i < len(closure):
                fa = closure[i]
                j = 0
                while j < len(fa.transitions):
                    fat = fa.transitions[j]
                    if fat.isEpsilon():
                        del fa.transitions[j]
                        j -=1
                        fa.addEpsilon(fat.to)
                        done = False
                        break
                    j += 1
                i += 1
            if done == False:
                closure = closure[0].fillClosure()
                break
            fa.__isCompact = True
        return result

    def setIds(self) -> None:
        i = 0
        for fa in self.fillClosure():
            fa.id = i
            i += 1
    @staticmethod
    def literal(codepoints: Iterator[int], accept: int = 0, compact: bool = True) -> FA:
        result = FA()
        current: FA = result
        for cp in codepoints:
            current.acceptSymbol = -1
            newFa = FA()
            newFa.acceptSymbol = accept
            current.addTransition(FARange(cp, cp), newFa, compact)
            current = newFa
        return result
    @staticmethod
    def charset(ranges: Iterator[FARange], accept: int = 0, compact: bool = True) -> FA:
        result = FA()
        final = FA(accept)
        l = sorted(ranges)
        for rng in l:
            result.addTransition(rng,final,compact)
        return result
    @staticmethod
    def concat(exprs: Iterator[FA], accept: int = 0, compact: bool = True) -> FA:
        result = None
        left = None
        right = None
        for expr in exprs:
            nval = expr.clone()
            if left is None:
                if result is None:
                    result = nval
                left = nval
                continue
            if right is None:
                right = nval

            nval = right.clone()
            FA._concat(left, nval, compact)
        target = result
        if right != None:
            target = right

        cl = target.fillClosure()
        for cfa in cl:
            if cfa.acceptSymbol != -1:
                cfa.acceptSymbol = accept
        if result is None:
            return FA(accept)
        return result
    @staticmethod
    def union(exprs: Iterator[FA], accept: int = 0, compact: bool = True) -> FA:
        result = FA()
        final = FA()
        final.acceptSymbol = accept
        for expr in exprs:
            nfa = expr.clone()
            nacc = nfa.fillClosure()
            for afa in nacc:
                if afa.acceptSymbol != -1:
                    afa.acceptSymbol = -1
                    afa.addEpsilon(final, compact)
            result.addEpsilon(nfa, compact)
        return result
    @staticmethod
    def optional(expr: Iterator[FA], accept: int = 0, compact: bool = True) -> FA:
        result = expr.clone()
        acc = result.fillClosure()
        for afa in acc:
            if afa.acceptSymbol != -1:
                afa.acceptSymbol = accept
                result.addEpsilon(afa, compact)
        return result
    @staticmethod
    def repeat(expr: FA, minOccurs: int = 0, maxOccurs: int = 0, accept: int = 0, compact: bool = True) -> FA:
        expr = expr.clone()
        if minOccurs > 0 and maxOccurs > 0 and minOccurs > maxOccurs:
            raise Exception("Minimum is greater than maximum")
        result = None
        match minOccurs:
            case -1|0:
                match maxOccurs:
                    case -1|0:
                        result = FA()
                        result.acceptSymbol = accept
                        result.addEpsilon(expr, compact)
                        for afa in expr.fillClosure():
                            if afa.acceptSymbol != -1:
                                afa.addEpsilon(result, compact)
                        return result
                    case 1:
                        result = FA.optional(expr, accept, compact)
                        return result
                    case _:
                        l = []
                        expr = FA.optional(expr, accept, compact)
                        l.append(expr)
                        i = 1
                        while i < maxOccurs:
                            l.append(expr.clone())
                            i += 1
                        result = FA.concat(l, accept, compact)
                        return result
            case 1:
                match maxOccurs:
                    case -1|0:
                        result = FA.concat([expr, FA.repeat(expr, 0, 0, accept, compact)], accept, compact)
                        return result
                    case 1:
                        return expr
                    case _:
                        result = FA.concat([expr, FA.repeat(expr, 0, maxOccurs - 1, accept, compact)], accept, compact)
                        return result
            case _:
                match maxOccurs:
                    case -1|0:
                        result = FA.concat([FA.repeat(expr, minOccurs, minOccurs, accept, compact), FA.repeat(expr, 0, 0, accept, compact)], accept, compact)
                        return result
                    case 1:
                        raise Exception("Should never get here")
                    case _:
                        if minOccurs == maxOccurs:
                            l = []
                            expr = FA.optional(expr, accept, compact)
                            l.append(expr)
                            i = 1
                            while i < maxOccurs:
                                l.append(expr.clone())
                                i += 1
                            result = FA.concat(l, accept, compact)
                            return result
                        result = FA.concat([FA.repeat(expr, minOccurs, minOccurs, accept, compact), FA.repeat(FA.optional(expr, accept, compact), maxOccurs - minOccurs, maxOccurs - minOccurs, accept, compact)], accept, compact)
                        return result

    def fillInputTransitionRangesGroupedByState(self, includeEpsilonTransitions: bool = False, result: dict[FA,list[FARange]] = None) -> dict[FA,list[FARange]]:
        if result is None:
            result = dict()

        for fat in self.transitions:
            if includeEpsilonTransitions or fat.isEpsilon()==False:
                res = result.get(fat.to,None)
                if res is None:
                    ndata = [FARange(fat.min, fat.max)]
                    result[fat.to]= ndata
                else:
                    res.append(FARange(fat.min, fat.max))

        for values in result.values():
            values.sort()
        return result

    @staticmethod
    def toUtf32(text: str) -> Iterator[int]:
        for s in text:
            yield ord(s)

    def toArray(self) -> list[int]:
        fa = self
        result: list[int] = []
        closure = fa.fillClosure()
        stateIndices: list[int] = []
        i: int = 0
        while i < len(closure):
            cfa = closure[i]
            stateIndices.append(len(result))
            result.append(cfa.acceptSymbol)
            itrgp = cfa.fillInputTransitionRangesGroupedByState(True)
            result.append(len(itrgp))
            for itr in itrgp.items():
                result.append(closure.index(itr[0]))
                result.append(len(itr[1]))
                for pack in FARange.toPacked(itr[1]):
                    result.append(pack)
            i += 1
        state: int = 0
        while state < len(result):
            state += 1
            tlen = result[state]
            state += 1
            i = 0
            while i < tlen:
                result[state] = stateIndices[result[state]]
                state += 1
                prlen = result[state]
                state += 1
                state += prlen * 2
                i += 1
        return result

    @staticmethod
    def fromArray(arr: list[int]) -> FA:
        if len(arr)==0:
            return FA()

        si = 0
        states = dict()
        while si < len(arr):
            newFa = FA()
            states[si]= newFa
            newFa.acceptSymbol = arr[si]
            si += 1
            tlen = arr[si]
            si += 1
            i = 0
            while i < tlen:
                si += 1
                prlen = arr[si]
                si += 1
                si += prlen * 2
                i += 1

        si = 0
        while si < len(arr):
            state = states.get(si)
            si += 1
            tlen = arr[si]
            si += 1
            i = 0
            while i < tlen:
                tto = arr[si]
                si += 1
                prlen = arr[si]
                si += 1
                to = states.get(tto)
                j = 0
                while j < prlen:
                    min = arr[si]
                    si += 1
                    max = arr[si]
                    si += 1
                    state.addTransition(FARange(min, max), to)
                    j += 1
                i += 1
        return states.get(0)

    def toLexer(tokens: Iterator[FA], makeDfa: bool = True, compact: bool = True, progress: FAProgress | None = None) -> FA:
        if makeDfa == True:
            i: int = 0
            while i < len(tokens):
                tokens[i] = tokens[i].toMinimizedDfa(progress)
                i += 1
        result = FA()
        if makeDfa == True:
            for token in tokens:
                result.addEpsilon(token.toMinimizedDfa(progress), True)
        else:
            for token in tokens:
                result.addEpsilon(token, compact)
        if makeDfa == True:
            return result.toDfa(progress)
        else:
            return result

    @staticmethod
    def fillMove(states: list[FA], codepoint: int, result: list[FA] | None = None) -> list[FA]:
        if result is None:
            result = []
        for state in states:
            for fat in state.transitions:
                # epsilon dsts should already be in states:
                if fat.isEpsilon():
                    continue
                if codepoint < fat.min:
                    break
                if codepoint <= fat.max:
                    fat.to.fillEpsilonClosure(result)
        return result

    @staticmethod
    def __normalizeSortedRangeList(pairs):
        i = 1
        while i < len(pairs):
            if pairs[i - 1].max + 1 >= pairs[i].min:
                nr = FARange(pairs[i - 1].min, pairs[i].max)
                pairs[i - 1] = nr
                del pairs[i]
                i -= 1 # compensated for by i += 1 in for loop
            i += 1
        i = 1
    @staticmethod
    def __fromHexChar(h):
        if ord(":") > h and ord("/") < h:
            return h - ord("0")
        if ord("G") > h and ord("@") < h:
            return h - ord("7") # 'A'-10
        if ord("g") > h and ord("`") < h:
            return h - ord("W") # 'a'-10
        raise Exception("The value was not hex.")

    @staticmethod
    def __isHexChar(h):
        if ord(":") > h and ord("/") < h:
            return True

        if ord("G") > h and ord("@") < h:
            return True

        if ord("g") > h and ord("`") < h:
            return True
        return False

    @staticmethod
    def __parseModifier(expr, pc, accept, compact):
        if pc.codepoint == -1:
            return expr
        position = pc.position
        match chr(pc.codepoint):
            case "*":
                expr = FA.repeat(expr, 0, 0, accept, compact)
                pc.advance()
            case "+":
                expr = FA.repeat(expr, 1, 0, accept, compact)
                pc.advance()
            case "?":
                expr = FA.optional(expr, accept, compact)
                pc.advance()
            case "{":
                pc.advance()
                pc.trySkipWhiteSpace()
                pc.expecting(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ',', '}'])
                min = -1
                max = -1
                if ord(",") != pc.codepoint and ord("}") != pc.codepoint:
                    l = len(pc.captureBuffer)
                    if pc.tryReadDigits() == False:
                        pc.expecting(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
                    min = int(pc.captureBuffer[l:])
                    pc.trySkipWhiteSpace()

                if ord(",") == pc.codepoint:
                    pc.advance()
                    pc.trySkipWhiteSpace()
                    pc.expecting(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '}'])
                    if ord("}") != pc.codepoint:
                        l = len(pc.captureBuffer)
                        pc.tryReadDigits()
                        max = int(pc.captureBuffer[l:])
                        pc.trySkipWhiteSpace()
                else:
                    max = min
                pc.expecting(['}'])
                pc.advance()
                expr = FA.repeat(expr, min, max, accept, compact)
        return expr
    @staticmethod
    def __parseEscapePart(pc):
        if -1 == pc.codepoint:
            return -1
        match chr(pc.codepoint):
            case "f":
                pc.advance()
                return ord("\f")
            case "v":
                pc.advance()
                return ord("\v")
            case "t":
                pc.advance()
                return ord("\t")
            case "n":
                pc.advance()
                return ord("\n")
            case "r":
                pc.advance()
                return ord("\r")
            case "x":
                if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
                    return ord("x")
                b = FA.__fromHexChar(pc.codepoint)
                if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
                    return b
                b <<= 4
                b |= FA.__fromHexChar(pc.codepoint)
                if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
                    return b
                b <<= 4
                b |= FA.__fromHexChar(pc.codepoint)
                if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
                    return b
                b <<= 4
                b |= FA.__fromHexChar(pc.codepoint)
                return b
            case "u":
                if -1 == pc.advance():
                    return ord("u")
                u = FA.__fromHexChar(pc.codepoint)
                u <<= 4
                if -1 == pc.advance():
                    return u
                u |= FA.__fromHexChar(pc.codepoint)
                u <<= 4
                if -1 == pc.advance():
                    return u
                u |= FA.__fromHexChar(pc.codepoint)
                u <<= 4
                if -1 == pc.advance():
                    return u
                u |= FA.__fromHexChar(pc.codepoint)
                return u
            case _:
                i = pc.codepoint
                pc.advance()
                return i

    @staticmethod
    def __parseRangeEscapePart(pc):
        if pc.codepoint == -1:
            return -1
        match chr(pc.codepoint):
            case "0":
                pc.advance()
                return ord("\0")
            case "f":
                pc.advance()
                return ord("\f")
            case "v":
                pc.advance()
                return ord("\v")
            case "t":
                pc.advance()
                return ord("\t")
            case "n":
                pc.advance()
                return ord("\n")
            case "r":
                pc.advance()
                return ord("\r")
            case "x":
                if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
                    return ord("x")
                b = FA.__fromHexChar(pc.codepoint)
                if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
                    return b
                b <<= 4
                b |= FA.__fromHexChar(pc.codepoint)
                if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
                    return b
                b <<= 4
                b |= FA.__fromHexChar(pc.codepoint)
                if -1 == pc.advance() or FA.__isHexChar(pc.codepoint) == False:
                    return b
                b <<= 4
                b |= FA.__fromHexChar(pc.codepoint)
                return b
            case "u":
                if -1 == pc.advance():
                    return ord("u")
                u = FA.__fromHexChar(pc.codepoint)
                u <<= 4
                if -1 == pc.advance():
                    return u
                u |= FA.__fromHexChar(pc.codepoint)
                u <<= 4
                if -1 == pc.advance():
                    return u
                u |= FA.__fromHexChar(pc.codepoint)
                u <<= 4
                if -1 == pc.advance():
                    return u
                u |= FA.__fromHexChar(pc.codepoint)
                return u
            case _:
                i = pc.codepoint
                pc.advance()
                return i

    @staticmethod
    def __parseSet(pc):
        result = []
        pc.ensureStarted()
        pc.expecting(['['])
        pc.advance()
        pc.expecting([])
        firstRead = True
        firstChar = 0
        readFirstChar = False
        wantRange = False
        isNot = False
        if ord("^") == pc.codepoint:
            isNot = True
            pc.advance()
            pc.expecting([])

        while -1 != pc.codepoint and (firstRead == True or ord("]") != pc.codepoint):
            if wantRange == False:
                # can be a single char,
                # a range
                # or a named character class
                if ord("[") == pc.codepoint: # named char class
                    epos = pc.position
                    pc.advance()
                    pc.expecting([])
                    if ord(":") != pc.codepoint:
                        firstChar = ord("[")
                        readFirstChar = True
                    else:
                        firstRead = False
                        pc.advance()
                        pc.expecting([])
                        ll = len(pc.captureBuffer)
                        if pc.tryReadUntil(ord(":"), False) == False:
                            raise Exception(f"Expecting character class at {pc.position}")
                        pc.expecting([':'])
                        pc.advance()
                        pc.expecting([']'])
                        pc.advance()
                        cls = pc.captureBuffer[ll:]
                        ranges = FACharacterClasses.known().get(cls,None);
                        if ranges==None:
                            raise Exception(f"Unknown character class \"{cls}\" specified at {epos}")

                        for rng in FARange.toUnpacked(ranges):
                            result.append(rng)

                        readFirstChar = False
                        wantRange = False
                        firstRead = False
                        continue
                if readFirstChar == False:
                    if ord("\\") == pc.codepoint:
                        pc.advance()
                        firstChar = FA.__parseRangeEscapePart(pc)
                    else:
                        firstChar = pc.codepoint
                        pc.advance()
                        pc.expecting([])
                    readFirstChar = True
                else:
                    if ord("-") == pc.codepoint:
                        pc.advance()
                        pc.expecting([])
                        wantRange = True
                    else:
                        result.append(FARange(firstChar, firstChar))
                        readFirstChar = False
                firstRead = False
            else:
                if ord("\\") != pc.codepoint:
                    ch = pc.codepoint
                    pc.advance()
                    pc.expecting([])
                    result.append(FARange(firstChar, ch))
                else:
                    min = firstChar
                    pc.advance()
                    result.append(FARange(min, FA.__parseRangeEscapePart(pc)))

                wantRange = False
                readFirstChar = False

        if readFirstChar == True:
            result.append(FARange(firstChar, firstChar))
            if wantRange == True:
                result.append(FARange(ord("-"),ord("-")))

        pc.expecting(["]"])
        pc.advance()
        return (isNot, result)
    @staticmethod
    def __invertRanges(ranges: list[FARange]):
        if ranges is None:
            return
        last = 0x10ffff
        e = iter(ranges)
        n = next(e,None)
        if n is None:
            yield FARange(0,0x10ffff)
            return
        if n.min > 0:
            yield FARange(0,n.min - 1)
            last = n.max
            if 0x10ffff <= last:
                return
        elif n.min == 0:
            last = n.max
            if 0x10ffff <= last:
                return
        n = next(e, None)
        while not (n is None):
            if (0x10ffff <= last):
                return
            if last + 1 < n.min:
                yield FARange(last+1,n.min-1)

            last = n.max
            n = next(e, None)
        if 0x10ffff > last:
            yield FARange(last+1,0x10ffff)

    @staticmethod
    def __toNotRanges(ranges):
        if ranges is None:
            return
        # expects ranges to be normalized
        last = 0x10ffff
        if hasattr(ranges,'__iter__'):
            e = iter(ranges)
        else:
            e = ranges
        cur = next(e, None)
        if cur is None:
            yield FARange(0x0, 0x10ffff)
            return
        if cur.min > 0:
            yield FARange(0, cur.min - 1)
            last = cur.max
            if  0x10ffff <= last:
                return
        elif cur.min == 0:
            last = cur.max
            if 0x10ffff <= last:
                return
        while not (cur is None):
            if 0x10ffff <= last:
                return
            if last + 1 < cur.min:
                yield FARange(last + 1, (cur.min - 1))
            last = cur.max
            cur = next(e, None)

        if 0x10ffff >= last:
            yield FARange((last + 1), 0x10ffff)

    @staticmethod
    def __parse(pc, accept = 0, compact = True):
        result = None
        nextExpr = None
        ich = 0
        pc.ensureStarted()
        while pc.codepoint != -1:
            match chr(pc.codepoint):
                #case -1:
                #    if result is None:
                #        # empty string
                #        result = FA(accept)
                #    return result
                case ".":
                    dot = FA.charset([FARange(0, 0x10ffff)], accept, compact)

                    pc.advance()

                    dot = FA.__parseModifier(dot, pc, accept, compact)
                    if result is None:
                        result = dot
                    else:
                        result = FA.concat([result, dot], accept, compact)

                case "\\":
                    pc.advance()
                    pc.expecting([])
                    isNot = False
                    ch = chr(pc.codepoint)
                    match ch:
                        case ch if ch in ["P", "p"]:
                            if pc.codepoint==ord("P"):
                                isNot = True
                            pc.advance()
                            pc.expecting(['{'])
                            uc = ""
                            while -1 != pc.advance() and ord("}")!=pc.codepoint:
                                uc += chr(pc.codepoint)
                            pc.expecting(['}'])
                            pc.advance()
                            uci = 0
                            match uc:
                                case "Pe":
                                    uci = 21
                                case "Pc":
                                    uci = 19
                                case "Cc":
                                    uci = 14
                                case "Sc":
                                    uci = 26
                                case "Pd":
                                    uci = 19
                                case "Nd":
                                    uci = 8
                                case "Me":
                                    uci = 7
                                case "Pf":
                                    uci = 23
                                case "Cf":
                                    uci = 15
                                case "Pi":
                                    uci = 22
                                case "Nl":
                                    uci = 9
                                case "Zl":
                                    uci = 12
                                case "Ll":
                                    uci = 1
                                case "Sm":
                                    uci = 25
                                case "Lm":
                                    uci = 3
                                case "Sk":
                                    uci = 27
                                case "Mn":
                                    uci = 5
                                case "Ps":
                                    uci = 20
                                case "Lo":
                                    uci = 4
                                case "Cn":
                                    uci = 29
                                case "No":
                                    uci = 10
                                case "Po":
                                    uci = 24
                                case "So":
                                    uci = 28
                                case "Zp":
                                    uci = 13
                                case "Co":
                                    uci = 17
                                case "Zs":
                                    uci = 11
                                case "Mc":
                                    uci = 6
                                case "Cs":
                                    uci = 16
                                case "Lt":
                                    uci = 2
                                case "Lu":
                                    uci = 0

                            if isNot:
                                nextExpr = FA.charset(FARange.toUnpacked(FACharacterClasses.unicodeCategories[uci]), accept, compact)
                            else:
                                nextExpr = FA.charset(FARange.toUnpacked(FACharacterClasses.notUnicodeCategories[uci]), accept, compact)
                        case "d":
                            nextExpr = FA.charset(FARange.toUnpacked(FACharacterClasses.digit), accept, compact)
                            pc.advance()
                        case "D":
                            nextExpr = FA.charset(FA.__toNotRanges(FARange.toUnpacked(FACharacterClasses.digit)), accept, compact)
                            pc.advance()
                        case "s":
                            nextExpr = FA.charset(FARange.toUnpacked(FACharacterClasses.space), accept, compact)
                            pc.advance()
                        case "S":
                            nextExpr = FA.charset(FA.__toNotRanges(FARange.toUnpacked(FACharacterClasses.space)), accept, compact)
                            pc.advance()
                        case "w":
                            nextExpr = FA.charset(FARange.toUnpacked(FACharacterClasses.word), accept, compact);
                            pc.advance()
                        case "W":
                            nextExpr = FA.charset(FA.__toNotRanges(FARange.toUnpacked(FACharacterClasses.word)), accept, compact);
                            pc.advance()
                        case _:
                            ich = FA.__parseEscapePart(pc)
                            if -1 != ich:
                                nextExpr = FA.literal([ich], accept, compact)
                            else:
                                pc.expecting([]) # throw an error

                    nextExpr = FA.__parseModifier(nextExpr, pc, accept, compact)
                    if not (result is None):
                        result = FA.concat([result, nextExpr], accept, compact)
                    else:
                        result = nextExpr
                case ")":
                    return result
                case "(":
                    pc.advance()
                    if pc.codepoint == ord("?"):
                        pc.advance()
                        pc.expecting([':'])
                        pc.advance()

                    pc.expecting([])
                    nextExpr = FA.__parse(pc, accept, compact)
                    pc.expecting([')'])
                    if -1 == pc.advance():
                        if result is None:
                            return nextExpr
                        else:
                            return FA.concat([result, nextExpr], accept, compact)

                    nextExpr = FA.__parseModifier(nextExpr, pc, accept, compact)
                    if result is None:
                        result = nextExpr
                    else:
                        result = FA.concat([result, nextExpr], accept, compact)
                case "|":
                    if -1 != pc.advance():
                        nextExpr = FA.__parse(pc, accept, compact)
                        result = FA.union([result, nextExpr], accept, compact)
                    else:
                        result = FA.optional(result, accept, compact)
                case "[":
                    seti = FA.__parseSet(pc)
                    sortset = seti[1]
                    sortset.sort()
                    FA.__normalizeSortedRangeList(sortset)
                    if seti[0] == True:
                        sortset = list(FA.__toNotRanges(sortset))
                    nextExpr = FA.charset(sortset, accept)
                    nextExpr = FA.__parseModifier(nextExpr, pc, accept, compact)
                    if result is None:
                        result = nextExpr
                    else:
                        result = FA.concat([result, nextExpr], accept, compact)
                case _:
                    ich = pc.codepoint
                    nextExpr = FA.literal([ich], accept, compact)
                    pc.advance()
                    nextExpr = FA.__parseModifier(nextExpr, pc, accept, compact)
                    if result is None:
                        result = nextExpr
                    else:
                        result = FA.concat([result, nextExpr], accept, compact)
        return result
    @staticmethod
    def parse(expression: str, accept: int = 0, compact: bool = True) -> FA:
        pc = _ParseContext(expression)
        result = FA.__parse(pc, accept, compact)
        if result is None:
            result = FA(accept)
        return result
    @staticmethod
    def acceptingFilter(state: FA) -> bool:
        return state.acceptSymbol != -1
    @staticmethod
    def finalFilter(state: FA) -> bool:
        return state.isFinal()
    @staticmethod
    def neutralFilter(state: FA) -> bool:
        return state.isNeutral()
    @staticmethod
    def trapFilter(state: FA) -> bool:
        return state.isTrap()

    def fillFind(self, predicate: Callable[[FA], bool], result: list[FA] = None) -> list[FA]:
        if result is None:
            result = []
        if self in result:
            return result
        if predicate(self) == True:
            result.append(self)
        for trn in self.transitions:
            trn.to.fillFind(predicate, result)
        return result

    def findFirst(self, predicate: Callable[[FA], bool]) -> FA | None:
        if predicate(self) == True:
            return self
        for trn in self.transitions:
            result = trn.to.findFirst(predicate)
            if not (result is None):
                return result
        return None
    @staticmethod
    def __appendCodepointTo(codepoint, result):
        ch = chr(codepoint)
        match ch:
            case '.'|'['|']'|'^'|'-'|'+'|'?'|'('|')'|'\\':
                result += '\\'
                result += ch
                return;
            case '\t':
                result += "\\t"
            case '\n':
                result += "\\n"
            case '\r':
                result += "\\r"
            case '\0':
                result +="\\0"
            case '\f':
                result += "\\f"
            case '\v':
                result += "\\v"
            case '\b':
                result += "\\b"
            case _:
                if codepoint<32 or codepoint>127:
                    if len(ch) == 1:
                        result += "\\u"
                        result += format(codepoint,'04x')
                    else:
                        result += "\\U"
                        result += format(codepoint,'08x')
                else:
                    result += ch
        return result

    @staticmethod
    def __appendRangeCodepointTo(codepoint, result):
        ch = chr(codepoint)
        match ch:
            case '.'|'['|']'|'^'|'-'|'('|')'|'{'|'}'|'\\':
                result += '\\'
                result += ch
            case '\t':
                result += "\\t"
            case '\n':
                result += "\\n"
            case '\r':
                result += "\\r"
            case '\0':
                result +="\\0"
            case '\f':
                result += "\\f"
            case '\v':
                result += "\\v"
            case '\b':
                result += "\\b"
            case _:
                if codepoint<32 or codepoint>127:
                    if len(ch) == 1:
                        result += "\\u"
                        result += format(codepoint,'04x')
                    else:
                        result += "\\U"
                        result += format(codepoint,'08x')
                else:
                    result += ch
        return result

    @staticmethod
    def __appendRangeTo(ranges, index, result):
        first = ranges[index].min
        last = ranges[index].max
        if first == 0 and last == 1114111:
            return result + '.'
        result = FA.__appendRangeCodepointTo(first, result)
        if last == first:
            return result

        if last == first + 1: # spit out 1 and 2 length ranges as flat chars
            result = FA.__appendRangeCodepointTo(last, result)
            return result
        elif last == first + 2:
            result = FA.__appendRangeCodepointTo(first+1, result)
            result = FA.__appendRangeCodepointTo(last, result)
            return result
        result +='-'
        result = FA.__appendRangeCodepointTo(last, result)
        return result

    @staticmethod
    def __appendRangesTo(ranges, result):
        i = 0
        while i < len(ranges):
            result = FA.__appendRangeTo(ranges,i,result)
            i+= 1
        return result

    @staticmethod
    def __toExprFillEdgesIn(edges, fa, result):
        for edge in edges:
            if edge.toState is fa:
                result.append(edge)
    @staticmethod
    def __toExprFillEdgesOut(edges, fa, result):
        for edge in edges:
            if edge.fromState is fa:
                result.append(edge)
    @staticmethod
    def __toExpressionFillEdgesOrphanState(edges, fa, result):
        for edge in edges:
            if ((edge.fromState is fa) or (edge.toState is fa)):
                continue
            result.append(edge)
    @staticmethod
    def __toExpressionOrJoin(strings):
        if len(strings) == 0:
            return ""
        if len(strings) == 1:
            return strings[0]
        return "("+"|".join(strings)+")"

    @staticmethod
    def __toExpressionKleeneStar(s, noWrap,result):
        if s is None or len(s) == 0:
            return result
        if noWrap == True or len(s) == 1:
            result += f"{s}*"
            return result
        result += f"({s})*"
        return result

    @staticmethod
    def __toExpression(fa: FA) -> str:
        closure: list[FA] = []
        fsmEdges: list[_ExpEdge] = []
        first: FA = fa
        final: FA = None
        # linearize the state machine
        acc: list[FA] = []
        for f in first.fillClosure():
            if f.acceptSymbol != -1:
                acc.append(f)
        if len(acc) == 1:
            final = acc[0]
        elif len(acc) > 1:
            fa = fa.clone()
            first = fa
            acc.clear()
            for f in fa.fillClosure():
                if f.acceptSymbol != -1:
                    acc.append(f)

            final = FA(acc[0].acceptSymbol)
            for a in acc:
                a.addEpsilon(final, False)
                a.acceptSymbol = -1
        first.fillClosure(closure)
        sb = [] # string builder
        # build the machine from the FA
        trnsgrp = dict()
        for cfa in closure:
            trnsgrp.clear()
            for trns in cfa.fillInputTransitionRangesGroupedByState(True,trnsgrp).items():
                sb = ""
                if len(trns[1]) == 1 and trns[1][0].min == trns[1][0].max:
                    rng = trns[1][0]
                    if rng.min == -1 or rng.max == -1:
                        eedge = _ExpEdge()
                        eedge.exp = ""
                        eedge.fromState = cfa
                        eedge.toState = trns[0]
                        fsmEdges.append(eedge)
                        continue
                    sb = FA.__appendCodepointTo(rng.min, sb)
                else:
                    sb += "["
                    sb = FA.__appendRangesTo(trns[1], sb)
                    sb += "]"

                edge = _ExpEdge()
                edge.exp = sb
                edge.fromState = cfa
                edge.toState = trns[0]
                fsmEdges.append(edge)
        tmp = FA()
        tmp.addEpsilon(first, False)
        q0 = first
        first = tmp
        tmp = FA(final.acceptSymbol)
        qLast = final
        final.acceptSymbol = -1
        final.addEpsilon(tmp, False)
        final = tmp
        # add first and final
        newEdge = _ExpEdge()
        newEdge.exp = ""
        newEdge.fromState = first
        newEdge.toState = q0
        fsmEdges.append(newEdge)
        newEdge = _ExpEdge()
        newEdge.exp = ""
        newEdge.fromState = qLast
        newEdge.toState = final
        fsmEdges.append(newEdge)
        closure[0].setIds()
        closure.insert(0, first)
        closure.append(final)
        inEdges: list[_ExpEdge] = []
        outEdges: list[_ExpEdge] = []
        while len(closure) > 2:
            node = closure[1]
            loops = []
            inEdges.clear()
            FA.__toExprFillEdgesIn(fsmEdges, node, inEdges)
            for edge in inEdges:
                if edge.fromState is edge.toState:
                    loops.append(edge.exp)
            sb = ""
            sb = FA.__toExpressionKleeneStar(FA.__toExpressionOrJoin( loops),len(loops) > 1, sb)
            middle = sb
            for inEdge in inEdges:
                if inEdge.fromState is inEdge.toState:
                    continue
                outEdges.clear()
                FA.__toExprFillEdgesOut(fsmEdges, node, outEdges)
                for outEdge in outEdges:
                    if outEdge.fromState is outEdge.toState:
                        continue
                    expEdge = _ExpEdge()
                    expEdge.fromState = inEdge.fromState
                    expEdge.toState = outEdge.toState
                    sb = ""
                    sb += inEdge.exp
                    sb += middle
                    sb += outEdge.exp
                    expEdge.exp = sb
                    fsmEdges.append(expEdge)
            # reuse inedges since we're not using it
            inEdges.clear()
            FA.__toExpressionFillEdgesOrphanState(fsmEdges, node,inEdges)
            fsmEdges.clear()
            for edge in inEdges:
                fsmEdges.append(edge)
            closure.remove(node)
        sb = ""
        if len(fsmEdges) == 1:
            return fsmEdges[0].exp

        if len(fsmEdges) > 1:
            sb+="("
            sb+=fsmEdges[0].exp
            i = 1
            while i < len(fsmEdges):
                sb += "|"
                edge = fsmEdges[i]
                sb += edge.exp
                i += 1
            sb += ")"

        return sb
    def toString(self, format: str | None = "") -> str:
        if (format is None) or len(format) == 0:
            return str(self)
        if format == "e":
            return FA.__toExpression(self)
        raise Exception("Invalid format specifier")

    @staticmethod
    def __escapeLabel(label: str):
        if (label is None) or len(label) == 0:
            return label
        result = label.replace("\\", "\\\\")
        result = result.replace("\"", "\\\"")
        result = result.replace("\n", "\\n")
        result = result.replace("\r", "\\r")
        result = result.replace("\0", "\\0")
        result = result.replace("\v", "\\v")
        result = result.replace("\t", "\\t")
        result = result.replace("\f", "\\f")
        return result

    @staticmethod
    def __writeCompoundDotTo(closure: list[FA], writer: list[str], options: FADotGraphOptions | None = None) -> None:
        writer.append("digraph FA {\n")
        vert: bool = True
        if options is None or options.vertical == False:
            writer.append("rankdir=LR\n")
            vert = False

        writer.append("node [shape=circle]\n")
        opt2 = FADotGraphOptions()
        opt2.debugSourceNfa = None
        if options is None:
            options = FADotGraphOptions()
        opt2.statePrefix = options.statePrefix
        opt2.debugString = options.debugString
        opt2.debugShowNfa = False
        opt2.dpi = options.dpi
        opt2.acceptSymbolNames = options.acceptSymbolNames
        opt2.hideAcceptSymbolIds = options.hideAcceptSymbolIds
        opt2.blockEnds = options.blockEnds
        if vert == False:
            FA.__writeDotTo(closure, writer, options, 2)
            FA.__writeDotTo(options.debugSourceNfa.fillClosure(), writer, opt2, 1)
        else:
            FA.__writeDotTo(options.debugSourceNfa.fillClosure(), writer, opt2, 2)
            FA.__writeDotTo(closure, writer, options, 1)
        writer.append("}\n")

    @staticmethod
    def __writeDotTo(closure : list[FA], writer : list[str], options : FADotGraphOptions|None = None, clusterIndex : int = -1) -> None:
        if options is None:
            options = FADotGraphOptions()
        hasBlockEnds = options.debugShowNfa == False and (options.debugString is None) and (not (options.blockEnds is None))
        spfx = "q"
        if not (options.statePrefix is None) and len(options.statePrefix)>0:
            spfx = options.statePrefix
        pfx = ""
        if clusterIndex != -1:
            writer.append(f"subgraph cluster_{clusterIndex} {{\n")
            pfx = f"c{clusterIndex}"
        else:
            writer.append("digraph FA {\n")
        if options.vertical == False:
            writer.append("rankdir=LR\n");

        writer.append("node [shape=circle]\n")
        accepting: list[FA] = []
        finals: list[FA] = []
        neutrals: list[FA] = []
        for ffa in closure:
            if ffa.acceptSymbol != -1:
                accepting.append(ffa)
            elif ffa.isNeutral():
                neutrals.append(ffa)
            elif ffa.isFinal():
                finals.append(ffa)

        fromStates: list[FA] | None = None
        toStates: list[FA] | None = None

        tchar: int = -1
        if not (options.debugString is None):
            for ch in FA.toUtf32(options.debugString):
                if fromStates is None:
                    fromStates = closure[0].fillEpsilonClosure()
                else:
                    fromStates = toStates
                tchar = ch
                toStates = FA.fillMove(fromStates, ch)
                if len(toStates) == 0:
                    break
        if fromStates is None:
            fromStates = closure[0].fillEpsilonClosure(fromStates)

        if not (toStates is None):
            toStates = FA.fillEpsilonClosureAll(toStates)
        else:
            toStates = fromStates
        i = 0
        for ffa in closure:
            isfrom = (not (fromStates is None)) and ffa in FA.fillEpsilonClosureAll(fromStates)
            rngGrps = ffa.fillInputTransitionRangesGroupedByState()
            for rngGrp in rngGrps.items():
                istrns = isfrom and (not (toStates is None)) and (not (options.debugString is None)) and rngGrp[0] in toStates
                di = closure.index(rngGrp[0])
                writer.append(f"{pfx}{spfx}{i}->{pfx}{spfx}{di} [label=\"")
                sb = ""
                notRanges = list(FA.__invertRanges(rngGrp[1]))
                if len(notRanges)*1.5 > len(rngGrp[1]):
                    sb = FA.__appendRangesTo(rngGrp[1],sb)
                else:
                    if len(notRanges) == 0:
                        sb+="."
                    else:
                        sb+="^"
                        sb = FA.__appendRangesTo(notRanges,sb)

                if len(sb) != 1 or sb == " ":
                    writer.append('[')
                    if len(sb) > 16:
                        sb = sb[0:16]
                        sb += "..."

                    writer.append(f"{FA.__escapeLabel(sb)}]")
                else:
                    writer.append(FA.__escapeLabel(sb))
                if istrns == False:
                    writer.append("\"]")
                else:
                    writer.append("\",color=green]\n")

            # do epsilons
            for fat in ffa.transitions:
                if fat.isEpsilon() == True:
                    istrns = (not (toStates is None)) and (not (options.debugString is None)) and (ffa in toStates) and fat.to in toStates
                    col = "gray"
                    if istrns == True:
                        col = "green"
                    writer.append(f"{pfx}{spfx}{i}->{pfx}{spfx}{closure.index(fat.to)} [style=dashed,color={col}]\n")

            # do block ends
            bel = 0
            if not (options.blockEnds is None):
                bel = len(options.blockEnds)
            if hasBlockEnds == True and ffa.acceptSymbol != -1 and bel > ffa.acceptSymbol and (not (options.blockEnds[ffa.acceptSymbol] is None)):
                writer.append(f"{pfx}{spfx}{i}->{pfx}blockEnd{ffa.acceptSymbol}{spfx}0 [style=dashed,label=\".*?\"]\n")

            i += 1

        delim = ""
        if hasBlockEnds == True:
            bel = 0
            if not (options.blockEnds is None):
                bel = len(options.blockEnds)
            i = 0
            while i < bel:
                bfa = options.blockEnds[i]
                if not (bfa is None):
                    bclose = bfa.fillClosure()
                    delim = ""
                    qi = 0
                    while qi < len(bclose):
                        cbfa = bclose[qi]
                        rngGrps = cbfa.fillInputTransitionRangesGroupedByState(True)
                        for rngGrp in rngGrps.items():
                            di = bclose.index(rngGrp[0])
                            writer.append(f"{pfx}blockEnd{i}{spfx}{qi}->{pfx}blockEnd{i}{spfx}{di} [label=\"")
                            sb = FA.__appendRangesTo(rngGrp[1],sb)
                            if len(sb) != 1 or sb == " ":
                                middle = ""
                                if len(sb) > 16:
                                    sb = sb[:16]
                                    middle = "..."
                                writer.append(f"[{FA.__escapeLabel(sb)}{middle}]\"]\n")
                            else:
                                writer.append(f"{FA.__escapeLabel(sb)}\"]\n")

                        # do epsilons
                        for fat in cbfa.transitions:
                            if fat.isEpsilon() == True:
                                di = bclose.index(fat.to);
                                writer.append(f"{pfx}blockEnd{i}{spfx}{qi}->{pfx}blockEnd{i}{spfx}{di} [style=dashed,color=gray]\n")
                        qi += 1
                    qi = 0
                    while qi < len(bclose):
                        cbfa = bclose[qi]
                        writer.append(f"{pfx}blockEnd{i}{spfx}{qi} [label=<<TABLE BORDER=\"0\"><TR><TD>(be){spfx}<SUB>{qi}</SUB></TD></TR>")
                        if cbfa.acceptSymbol!=-1 and options.hideAcceptSymbolIds == False:
                            acc = None
                            if (not (options.acceptSymbolNames is None)) and len(options.acceptSymbolNames) > i:
                                acc = options.acceptSymbolNames[i]
                            if acc is None:
                                acc = str(i)
                            acc2 = acc.replace("\"", "&quot;")
                            writer.append(f"<TR><TD>{acc2}</TD></TR>")

                        writer.append("</TABLE>>")
                        if cbfa.acceptSymbol != -1:
                            writer.append(",shape=doublecircle")
                        elif cbfa.isFinal() == True or cbfa.isNeutral() == True:
                            writer.append(",color=gray")
                        writer.append("]\n")
                        qi += 1
                i += 1
        delim = ""
        i = 0
        for ffa in closure:
            col = ""
            if not (options.debugString is None):
                if not (toStates is None):
                    epstates= FA.fillEpsilonClosureAll(toStates)
                    if  ffa in epstates:
                        col = "color=green,"
                    elif ffa in epstates and (not (ffa in toStates)):
                        col = "color=darkgreen,"
                else:
                    col = "color=darkgreen,"

            writer.append(f"{pfx}{spfx}{i} [{col}label=<<TABLE BORDER=\"0\"><TR><TD>{spfx}<SUB>{i}</SUB></TD></TR>")

            if not (options.debugSourceNfa is None):
                fromStates = ffa.__fromStates
                if not (fromStates is None):
                    brk =  int(math.floor(math.sqrt(len(fromStates))))
                    if len(fromStates) <= 3:
                        brk = 3
                    j = 0
                    while j < len(fromStates):
                        if j == 0:
                            writer.append("<TR><TD>{ ")
                            delim = ""
                        elif (j % brk) == 0:
                            delim = ""
                            writer.append("</TD></TR><TR><TD>")
                        fromFA = fromStates[j]
                        writer.append(f"{delim}q<SUB>{options.debugSourceNfa.fillClosure().index(fromFA)}</SUB>")
                        # putting a comma here is what we'd like
                        # but it breaks dot no matter how its encoded
                        delim = " "
                        if j==len(fromStates)-1:
                            writer.append(" }</TD></TR>")
                        j += 1

            bel = 0
            if not (options.blockEnds is None):
                bel = len(options.blockEnds)
            if ffa.acceptSymbol != -1 and options.hideAcceptSymbolIds == False and not (hasBlockEnds == True and bel > ffa.acceptSymbol and (not (options.blockEnds[ffa.acceptSymbol] is None))):
                acc = None
                if not (options.acceptSymbolNames is None) and len(options.acceptSymbolNames) > ffa.acceptSymbol:
                    acc = options.acceptSymbolNames[ffa.acceptSymbol]
                if acc is None:
                    acc = str(ffa.acceptSymbol)
                acc2 = acc.replace("\"", "&quot;")
                writer.append(f"<TR><TD>{acc2}</TD></TR>")

            writer.append("</TABLE>>")
            isfinal = False

            if (ffa in accepting) and ((hasBlockEnds == False or len(bel) <= ffa.acceptSymbol or (options.blockEnds[ffa.acceptSymbol] is None))):
                writer.append(",shape=doublecircle")
            elif isfinal == True or ffa in neutrals:
                if ((fromStates is None) or not (ffa in fromStates)) and ((toStates is None) or not (ffa in toStates)):
                    writer.append(",color=gray")

            writer.append("]\n")
            i+=1

        delim = ""
        if len(accepting) > 0:
            for ntfa in accepting:
                if hasBlockEnds == False or bel <= ntfa.acceptSymbol or (options.blockEnds is None) or (options.blockEnds[ntfa.acceptSymbol] is None):
                    writer.append(f"{delim}{pfx}{spfx}{closure.index(ntfa)}")
                    delim = ","
            if delim != "":
                writer.append(" [shape=doublecircle]\n")

        delim = ""
        if len(neutrals) > 0:
            for ntfa in neutrals:
                if ((fromStates is None) or (not (ntfa in fromStates))) and ((toStates is None) or (not (ntfa in toStates))):
                    writer.append(f"{delim}{pfx}{spfx}{closure.index(ntfa)}")
                    delim = ","

            writer.append(" [color=gray]\n")
            delim = ""

        delim = ""
        if len(finals) > 0:
            for ntfa in finals:
                writer.append(f"{delim}{pfx}{spfx}{closure.index(ntfa)}")
                delim = ","
            writer.append(" [shape=circle,color=gray]\n")

        writer.append("}\n")

    def __writeDot(self, writer: list[str], options: FADotGraphOptions | None = None) -> None:
        if (not (options is None)) and (not (options.debugSourceNfa is None)) and options.debugShowNfa == True:
            FA.__writeCompoundDotTo(self.fillClosure(), writer, options)
        else:
            FA.__writeDotTo(self.fillClosure(), writer, options)

    def renderToFile(self, path: str, options : FADotGraphOptions | None = None) -> None:
        if options is None:
            options = FADotGraphOptions()
        cmd: list[str] = ["dot"]
        arg: str = "-T"
        ext: str = os.path.splitext(path)[1].lower()
        dotwriter: list[str] = []
        match ext:
            case ".dot":
                self.__writeDot(dotwriter, options)
                output = open(path,"w")
                output.write("".join(dotwriter))
                output.close()
                return
            case ".png":
                arg += "png"
            case ".jpg"|".jpeg":
                arg += "jpg"
            case ".bmp":
                arg += "bmp"
            case ".svg":
                arg += "svg"
            case _:
                raise Exception("Unsupported output format")
        cmd.append(arg)
        if options.dpi > 0:
            cmd.append(f"-Gdpi={options.dpi}")
        self.__writeDot(dotwriter, options)
        cmd.append(f"-o{path}")
        p = Popen(cmd, stdout=None, stdin=PIPE, stderr=None, text=True)
        p.communicate(input="".join(dotwriter))[0]


#fa = FA.parse("foo\\/(.*)",0)
# nfa = FA.parse("[A-Z_a-z][0-9A-Z_a-z]*",0,False)
# opts = FADotGraphOptions()
# opts.hideAcceptSymbolIds = True
# opts.debugShowNfa = True
# opts.debugSourceNfa = nfa
# opts.dpi = 300
# opts.vertical = False
# nfa.toDfa().renderToFile("test.jpg",opts)

# compact = False
# firstPart = FA.charset([FARange(ord("A"),ord("Z")),FARange(ord("a"),ord("z")),FARange(ord("_"),ord("_"))],0,compact)
# nextPart = FA.charset([FARange(ord("A"),ord("Z")),FARange(ord("a"),ord("z")),FARange(ord("_"),ord("_")),FARange(ord("0"),ord("9"))],0,compact)
# ident = FA.concat([firstPart,FA.repeat(nextPart,0,0,0,compact)],0,compact)
# print(ident.toArray())
# print(ident.toDfa().toArray())
# print(ident.toMinimizedDfa().toArray())
# print("PARSING ---------------")
# ident = FA.parse("[A-Za-z_][A-Za-z_0-9]*",0,compact)
# print(ident.toArray())
# print(ident.toDfa().toArray())
# print(ident.toMinimizedDfa().toArray())