Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re, sys
- re1 = re.compile(rb"(([^=]|\\=)*)(=([^=]|\\=)*)?")
- import pprint
- def unescape(i):
- ret = b""
- esc = 0
- for c in i:
- if esc and c == b"\\":
- ret+= b"\\"
- esc = 0
- elif esc and c == b"=":
- ret+= b"="
- esc = 0
- elif esc and c == b"n":
- ret+= b"\n"
- esc = 0
- elif esc:
- raise ValueError(f"Parser error: got character {c} after \\")
- elif c == b"\\":
- esc = 1
- else:
- ret+= bytes([c])
- if esc:
- raise ValueError(f"Parser error: got EOL/EOF after \\")
- return ret
- def parse(i):
- terminators = set()
- rules = set()
- lines = i.split(b"\n")
- for l in lines:
- m = re1.match(l)
- if m.group(3):
- rules|= {(unescape(m.group(1)), unescape(m.group(3)[1:]))}
- else:
- terminators|= {unescape(m.group(1))}
- return terminators, rules
- def compile_f(c):
- pp = pprint.PrettyPrinter(indent=4)
- # Group based on the cardinality of elements in a multiset
- # Track information about order, but do not use it for grouping
- terms, rules = parse(c)
- states = {}
- for t in terms:
- k,s = lit_symm(t)
- if k in states:
- states[k] = merge_symm(s, states[k])
- else:
- states[k] = s
- while 1:
- #print("len states:", len(states))
- #pp.pprint(states)
- for k in states:
- v = states[k]
- #xprint(k,v)
- new_states = {}
- for chars in states:
- for a,b in rules:
- if charcounts_comp(b, chars):
- k,v = trans_symm(chars, states[chars], a, b)
- #print("\tRule",a,"->",b," got k:",k,", v:",v)
- if k in new_states:
- new_states[k] = merge_symm(new_states[k], v)
- elif k in states:
- new_states[k] = merge_symm(states[k], v)
- else:
- new_states[k] = v
- ns = states | new_states
- if states == ns:break
- states = ns
- #print("EWAFDSDFG")
- #print(states)
- # Generate code
- prog = """
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- struct WordNode {
- unsigned char* ptr;
- size_t len;
- struct WordNode* next;
- };
- """
- statelist = [(k, states[k])for k in states]
- # Generate input-match-and-verify for every state
- for idx,kv in enumerate(statelist):
- chrs,symm = kv
- symmc = {*symm}
- ordchrs = {e for t in symm for e in t}
- for a in ordchrs:
- for b in ordchrs:
- for c in ordchrs:
- if (a,c) in symmc and (a,b) in symm and (b,c) in symm:
- symmc-= {(a,c)}
- prog+= f"""
- int match_{idx}(unsigned char* ptr, size_t len) {{
- size_t count_dummy = 0;
- {"".join([f"size_t count_{ch} = 0;" for ch,_ in chrs])}
- for (size_t i = 0; i < len;++i) {{
- {"".join([f"if (ptr[i] == {ch}) {{if (count_{next((y for x,y in symmc if x == ch), 'dummy')})return 0;if (count_{ch} == 0) {{{''.join([f'count_{sm}+= count_{sm} ? 0 : 1;' for sm in ordchrs if (sm,ch) in symm])}count_{ch} = 1;}}}}" for ch in ordchrs])}
- {"else ".join(["if(0)count_dummy=0;"]+[f"if (ptr[i] == {ch})count_{ch}+= 1;" for ch,_ in chrs])}else return 0;
- }}
- {"".join([f"count_{ch}-=count_{ch}?1:0;" for ch in ordchrs])}
- {"".join([f"if (count_{ch}{['?','?','!=1','>1','<2','==1','<1','<0'][amt]})return 0;" for ch,amt in chrs])}
- return 1;
- }}
- """
- prog+= f"""
- int match(unsigned char* ptr, size_t len) {{
- {"".join([f"if (match_{idx}(ptr, len))return 1;" for idx in range(len(statelist))])}
- return 0;
- }}
- int terminator(unsigned char* ptr, size_t len) {{
- {"".join([f"unsigned char arr_{idx}[{len(t)}]={{{str([*t])[1:-1]}}};if(len == {len(t)} && 0==memcmp(arr_{idx},ptr,len))return 1;" for idx,t in enumerate(terms)])}
- return 0;
- }}
- void infloop(){{
- while (1){{}}
- }}
- void mainloop(unsigned char* input, size_t len) {{
- if (!match(input, len)){{free(input);infloop();}};
- if (terminator(input, len)){{
- fwrite(input, 1, len, stdout);
- exit(0);
- }}
- struct WordNode startval = {{
- input,
- len,
- NULL,
- }};
- struct WordNode* start = malloc(sizeof(struct WordNode));
- if (start == NULL)exit(-1);
- *start = startval;
- struct WordNode* end = start;
- while (1) {{
- if (start == NULL)continue;
- """
- for a,b in rules:
- prog+= f"""
- if (start->len + {len(b)} >= {len(a)}) {{
- unsigned char a[{len(a)}] = {{{str([*a])[1:-1]}}};
- unsigned char b[{len(b)}] = {{{str([*b])[1:-1]}}};
- size_t totlen = start->len + {len(b)} - {len(a)};
- unsigned char arr[totlen ? totlen : 1];
- for (size_t i = 0; i + {len(a)} < len + 1;++i) {{
- size_t j = 0;
- for (;j < {len(a)}; ++j) {{
- if (a[j] != start->ptr[i+j])break;
- }}
- if (j == {len(a)}) {{
- for(size_t k = 0;k < {len(b)};k++) {{
- arr[i+k] = b[k];
- }}
- for(size_t p = i + {len(a)};p < start->len;p++) {{
- arr[p+{len(b)}-{len(a)}] = start->ptr[p];
- }}
- if (match(arr, totlen)) {{
- if (terminator(arr, totlen)) {{
- fwrite(arr, 1, totlen, stdout);
- exit(0);
- }} else {{
- unsigned char* copy = malloc(totlen ? totlen : 1);
- if (copy == NULL)exit(-1);
- memcpy(copy, arr, totlen);
- struct WordNode* next = malloc(sizeof(struct WordNode));
- if (next == NULL)exit(-1);
- struct WordNode nextdata = {{
- copy,
- totlen,
- NULL,
- }};
- *next = nextdata;
- end->next = next;
- end = next;
- }}
- }}
- }}
- arr[i] = start->ptr[i];
- }}
- }}
- """
- prog+= """
- struct WordNode* tmp = start;
- start = start->next;
- free(tmp->ptr);
- free(tmp);
- }
- }
- // Read stdin and write size to i
- char* getStdin(size_t* i)
- {
- int c;
- *i = 0;
- char* ptrBuff = malloc(1);
- while ((c = getchar()) && c != EOF)
- {
- if ((ptrBuff = (char*)realloc(ptrBuff, sizeof (char)+*i)) != NULL)
- ptrBuff[(*i)++] = c;
- else
- {
- free(ptrBuff);
- return NULL;
- }
- }
- if (ptrBuff != NULL)
- ptrBuff[*i] = '\0';
- return ptrBuff;
- }
- int main() {
- size_t len;
- char* ptr = getStdin(&len);
- mainloop(ptr, len);
- return -2;
- }
- """
- with open("osht.c", "w") as f:
- f.write(prog)
- def charcounts(lit):
- counts = {}
- for ch in lit:
- counts[ch] = ch in counts and counts[ch] + 1 or 1
- return frozenset((ch,2**min(counts[ch],2))for ch in lit)
- def lit_symm(lit):
- s = {(a,b) for a in lit for b in lit if a != b}
- for j in range(len(lit)):
- for i in range(j):
- b = lit[i]
- a = lit[j]
- s-= {(a,b)}
- return charcounts(lit), s
- def charcounts_comp(substring, counts):
- subc = charcounts(substring)
- lut = {k:v for k,v in counts}
- return next((0 for ch,amt in subc if ch not in lut or lut[ch]<amt),1)
- def merge_symm(a,b):
- return a&b
- def trans_symm(chrs, symm, a, b):
- counts = {}
- for ch in a:
- counts[ch] = counts[ch] + 1 if ch in counts else 1
- for ch in b:
- counts[ch] = counts[ch] - 1 if ch in counts else -1
- chrs_dict = {k:v for k,v in chrs}
- a_set,a_symm = lit_symm(a)
- a_dict = {k:v for k,v in a_set}
- tot = a_dict | chrs_dict
- ret_dict = {}
- #print(f"\t\tDbg: tot:{tot}, b_dict:{a_dict}")
- for ch in tot:
- amt = ch in chrs_dict and chrs_dict[ch] or 1
- diff = ch in counts and counts[ch]
- #print(f"\t\t\tTrying {chr(ch)}, amt: {amt}, diff: {diff}")
- ret = 0
- if amt & 4:
- ret|= (diff < -1) | 2*(diff < 0) | 4
- if diff == 1:
- ret|= amt << 1
- elif diff > 1:
- ret|= 4
- else:
- ret|= amt >> -diff
- ret = ret & 7
- if ret > 1:
- ret_dict[ch] = ret
- # Calculate the symms
- new_symm = set()
- for x,y in symm:
- #print(f"\t\t\tTrying {chr(x)}{chr(y)}")
- # These symbols will not be part of the output, so skip them
- if x not in ret_dict or y not in ret_dict:
- #print(f"\t\t\t Skipped because {x not in ret_dict} or {y not in ret_dict}")
- continue
- # No way it affects the order
- if x not in b and y not in b:
- #print(f"\t\t\t Added because x not in b and y not in b")
- new_symm|= {(x,y)}
- continue
- # Out of order in a
- fail = False
- for i in range(len(a)):
- if a[i] != x:continue
- for j in range(i):
- if a[j] == y:fail = True;break#print(f"\t\t\t Failed because a[{j}]=={chr(y)} and a[{i}]=={chr(x)}");break
- if fail:break
- if fail:continue
- # If x in a, does something imply b appears not after y?
- if x in a:
- for ch in b:
- if ch == x or (ch == y and chrs_dict[y] == 2) or (ch, y) in symm:break
- else:
- #print(f"\t\t\t Failed because nothing implies b appears not after {chr(y)}")
- fail = True
- if fail:continue
- # If y in a, does something imply b appears not before x?
- if y in a:
- for ch in b:
- if ch == y or (ch == x and chrs_dict[x] == 2) or (x, ch) in symm:break
- else:
- #print(f"\t\t\t Failed because nothing implies b appears not before {chr(x)}")
- fail = True
- #print(f"\t\t\t Conditions passed so adding {chr(x)}{chr(y)}")
- new_symm|= {(x,y)}
- # Add single char symmetries caused by new additions
- for ch0 in a:
- if ch0 in chrs_dict:continue
- for ch1 in chrs_dict:
- if ch1 in a:continue
- if ch1 not in ret_dict:continue
- for chb in b:
- if (ch1, chb) in symm:
- #print(f"\t\t\t Adding single char symmetry {chr(ch1)}{chr(ch0)} because {chr(ch1)}{chr(chb)}")
- new_symm|= {(ch1,ch0)}
- if (chb, ch1) in symm:
- #print(f"\t\t\t Adding single char symmetry {chr(ch0)}{chr(ch1)} because {chr(chb)}{chr(ch1)}")
- new_symm|= {(ch0, ch1)}
- # Add symmetries in a that are preserved
- for x,y in a_symm:
- add = None
- if x not in chrs_dict or y not in chrs_dict:
- add = {(x,y)}
- else:
- continue
- if x in chrs_dict:
- for z,_x in new_symm:
- if _x == x:add|= {(z,y)}
- if y in chrs_dict:
- for _y, z in new_symm:
- if _y == y:add|= {(x,z)}
- new_symm|= add
- return frozenset({(k, ret_dict[k]) for k in ret_dict}), new_symm
- def xprint(k,v):
- out = "("
- for ch,amt in k:
- if amt < 2 or amt > 7:print("amt",amt);exit(-1)
- amtdsc = ["??", "0?", "1", "0 or 1", "2 or more", "not 1", "1 or more", "any amount"][amt]
- out+= f"{chr(ch)} -> {amtdsc}, "
- out+= ") => ["
- for a,b in v:
- out+= f" {chr(a)}{chr(b)} "
- out+= "]"
- print(out)
- code =\
- b"""
- yes
- =|
- =!
- a|b=|>
- >b=b>
- >!c=!
- |!=yes
- """[1:-1]
- res = compile_f(code)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement