Advertisement
Guest User

osht - v. 0.0.2

a guest
Feb 16th, 2022
177
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.01 KB | None | 0 0
  1. import re, sys
  2. re1 = re.compile(rb"(([^=]|\\=)*)(=([^=]|\\=)*)?")
  3. import pprint
  4.  
  5. def unescape(i):
  6. ret = b""
  7. esc = 0
  8. for c in i:
  9. if esc and c == b"\\":
  10. ret+= b"\\"
  11. esc = 0
  12. elif esc and c == b"=":
  13. ret+= b"="
  14. esc = 0
  15. elif esc and c == b"n":
  16. ret+= b"\n"
  17. esc = 0
  18. elif esc:
  19. raise ValueError(f"Parser error: got character {c} after \\")
  20. elif c == b"\\":
  21. esc = 1
  22. else:
  23. ret+= bytes([c])
  24. if esc:
  25. raise ValueError(f"Parser error: got EOL/EOF after \\")
  26. return ret
  27.  
  28.  
  29. def parse(i):
  30. terminators = set()
  31. rules = set()
  32. lines = i.split(b"\n")
  33. for l in lines:
  34. m = re1.match(l)
  35. if m.group(3):
  36. rules|= {(unescape(m.group(1)), unescape(m.group(3)[1:]))}
  37. else:
  38. terminators|= {unescape(m.group(1))}
  39. return terminators, rules
  40.  
  41. def compile_f(c):
  42. pp = pprint.PrettyPrinter(indent=4)
  43. # Group based on the cardinality of elements in a multiset
  44. # Track information about order, but do not use it for grouping
  45. terms, rules = parse(c)
  46. states = {}
  47. for t in terms:
  48. k,s = lit_symm(t)
  49. if k in states:
  50. states[k] = merge_symm(s, states[k])
  51. else:
  52. states[k] = s
  53. while 1:
  54. #print("len states:", len(states))
  55. #pp.pprint(states)
  56. for k in states:
  57. v = states[k]
  58. #xprint(k,v)
  59. new_states = {}
  60. for chars in states:
  61. for a,b in rules:
  62. if charcounts_comp(b, chars):
  63. k,v = trans_symm(chars, states[chars], a, b)
  64. #print("\tRule",a,"->",b," got k:",k,", v:",v)
  65. if k in new_states:
  66. new_states[k] = merge_symm(new_states[k], v)
  67. elif k in states:
  68. new_states[k] = merge_symm(states[k], v)
  69. else:
  70. new_states[k] = v
  71. ns = states | new_states
  72. if states == ns:break
  73. states = ns
  74. #print("EWAFDSDFG")
  75. #print(states)
  76. # Generate code
  77. prog = """
  78. #include <stdlib.h>
  79. #include <stdio.h>
  80. #include <string.h>
  81.  
  82. struct WordNode {
  83. unsigned char* ptr;
  84. size_t len;
  85. struct WordNode* next;
  86. };
  87. """
  88. statelist = [(k, states[k])for k in states]
  89. # Generate input-match-and-verify for every state
  90. for idx,kv in enumerate(statelist):
  91. chrs,symm = kv
  92. symmc = {*symm}
  93. ordchrs = {e for t in symm for e in t}
  94. for a in ordchrs:
  95. for b in ordchrs:
  96. for c in ordchrs:
  97. if (a,c) in symmc and (a,b) in symm and (b,c) in symm:
  98. symmc-= {(a,c)}
  99. prog+= f"""
  100. int match_{idx}(unsigned char* ptr, size_t len) {{
  101. size_t count_dummy = 0;
  102. {"".join([f"size_t count_{ch} = 0;" for ch,_ in chrs])}
  103. for (size_t i = 0; i < len;++i) {{
  104. {"".join([f"if (ptr[i] == {ch}) {{if (count_{next((y for x,y in symmc if x == ch), 'dummy')})return 0;if (count_{ch} == 0) {{{''.join([f'count_{sm}+= count_{sm} ? 0 : 1;' for sm in ordchrs if (sm,ch) in symm])}count_{ch} = 1;}}}}" for ch in ordchrs])}
  105. {"else ".join(["if(0)count_dummy=0;"]+[f"if (ptr[i] == {ch})count_{ch}+= 1;" for ch,_ in chrs])}else return 0;
  106. }}
  107. {"".join([f"count_{ch}-=count_{ch}?1:0;" for ch in ordchrs])}
  108. {"".join([f"if (count_{ch}{['?','?','!=1','>1','<2','==1','<1','<0'][amt]})return 0;" for ch,amt in chrs])}
  109. return 1;
  110. }}
  111. """
  112. prog+= f"""
  113. int match(unsigned char* ptr, size_t len) {{
  114. {"".join([f"if (match_{idx}(ptr, len))return 1;" for idx in range(len(statelist))])}
  115. return 0;
  116. }}
  117.  
  118. int terminator(unsigned char* ptr, size_t len) {{
  119. {"".join([f"unsigned char arr_{idx}[{len(t)}]={{{str([*t])[1:-1]}}};if(len == {len(t)} && 0==memcmp(arr_{idx},ptr,len))return 1;" for idx,t in enumerate(terms)])}
  120. return 0;
  121. }}
  122.  
  123. void infloop(){{
  124. while (1){{}}
  125. }}
  126.  
  127. void mainloop(unsigned char* input, size_t len) {{
  128. if (!match(input, len)){{free(input);infloop();}};
  129. if (terminator(input, len)){{
  130. fwrite(input, 1, len, stdout);
  131. exit(0);
  132. }}
  133. struct WordNode startval = {{
  134. input,
  135. len,
  136. NULL,
  137. }};
  138. struct WordNode* start = malloc(sizeof(struct WordNode));
  139. if (start == NULL)exit(-1);
  140. *start = startval;
  141. struct WordNode* end = start;
  142. while (1) {{
  143. if (start == NULL)continue;
  144. """
  145. for a,b in rules:
  146. prog+= f"""
  147. if (start->len + {len(b)} >= {len(a)}) {{
  148. unsigned char a[{len(a)}] = {{{str([*a])[1:-1]}}};
  149. unsigned char b[{len(b)}] = {{{str([*b])[1:-1]}}};
  150. size_t totlen = start->len + {len(b)} - {len(a)};
  151. unsigned char arr[totlen ? totlen : 1];
  152. for (size_t i = 0; i + {len(a)} < len + 1;++i) {{
  153. size_t j = 0;
  154. for (;j < {len(a)}; ++j) {{
  155. if (a[j] != start->ptr[i+j])break;
  156. }}
  157. if (j == {len(a)}) {{
  158. for(size_t k = 0;k < {len(b)};k++) {{
  159. arr[i+k] = b[k];
  160. }}
  161. for(size_t p = i + {len(a)};p < start->len;p++) {{
  162. arr[p+{len(b)}-{len(a)}] = start->ptr[p];
  163. }}
  164. if (match(arr, totlen)) {{
  165. if (terminator(arr, totlen)) {{
  166. fwrite(arr, 1, totlen, stdout);
  167. exit(0);
  168. }} else {{
  169. unsigned char* copy = malloc(totlen ? totlen : 1);
  170. if (copy == NULL)exit(-1);
  171. memcpy(copy, arr, totlen);
  172. struct WordNode* next = malloc(sizeof(struct WordNode));
  173. if (next == NULL)exit(-1);
  174. struct WordNode nextdata = {{
  175. copy,
  176. totlen,
  177. NULL,
  178. }};
  179. *next = nextdata;
  180. end->next = next;
  181. end = next;
  182. }}
  183. }}
  184. }}
  185. arr[i] = start->ptr[i];
  186. }}
  187. }}
  188. """
  189. prog+= """
  190. struct WordNode* tmp = start;
  191. start = start->next;
  192. free(tmp->ptr);
  193. free(tmp);
  194. }
  195. }
  196.  
  197. // Read stdin and write size to i
  198. char* getStdin(size_t* i)
  199. {
  200. int c;
  201. *i = 0;
  202. char* ptrBuff = malloc(1);
  203.  
  204. while ((c = getchar()) && c != EOF)
  205. {
  206. if ((ptrBuff = (char*)realloc(ptrBuff, sizeof (char)+*i)) != NULL)
  207. ptrBuff[(*i)++] = c;
  208. else
  209. {
  210. free(ptrBuff);
  211. return NULL;
  212. }
  213. }
  214.  
  215. if (ptrBuff != NULL)
  216. ptrBuff[*i] = '\0';
  217.  
  218. return ptrBuff;
  219. }
  220.  
  221. int main() {
  222. size_t len;
  223. char* ptr = getStdin(&len);
  224. mainloop(ptr, len);
  225. return -2;
  226. }
  227. """
  228. with open("osht.c", "w") as f:
  229. f.write(prog)
  230.  
  231.  
  232. def charcounts(lit):
  233. counts = {}
  234. for ch in lit:
  235. counts[ch] = ch in counts and counts[ch] + 1 or 1
  236. return frozenset((ch,2**min(counts[ch],2))for ch in lit)
  237.  
  238. def lit_symm(lit):
  239. s = {(a,b) for a in lit for b in lit if a != b}
  240. for j in range(len(lit)):
  241. for i in range(j):
  242. b = lit[i]
  243. a = lit[j]
  244. s-= {(a,b)}
  245. return charcounts(lit), s
  246.  
  247. def charcounts_comp(substring, counts):
  248. subc = charcounts(substring)
  249. lut = {k:v for k,v in counts}
  250. return next((0 for ch,amt in subc if ch not in lut or lut[ch]<amt),1)
  251.  
  252.  
  253. def merge_symm(a,b):
  254. return a&b
  255.  
  256. def trans_symm(chrs, symm, a, b):
  257. counts = {}
  258. for ch in a:
  259. counts[ch] = counts[ch] + 1 if ch in counts else 1
  260. for ch in b:
  261. counts[ch] = counts[ch] - 1 if ch in counts else -1
  262. chrs_dict = {k:v for k,v in chrs}
  263. a_set,a_symm = lit_symm(a)
  264. a_dict = {k:v for k,v in a_set}
  265. tot = a_dict | chrs_dict
  266. ret_dict = {}
  267. #print(f"\t\tDbg: tot:{tot}, b_dict:{a_dict}")
  268. for ch in tot:
  269. amt = ch in chrs_dict and chrs_dict[ch] or 1
  270. diff = ch in counts and counts[ch]
  271. #print(f"\t\t\tTrying {chr(ch)}, amt: {amt}, diff: {diff}")
  272. ret = 0
  273. if amt & 4:
  274. ret|= (diff < -1) | 2*(diff < 0) | 4
  275. if diff == 1:
  276. ret|= amt << 1
  277. elif diff > 1:
  278. ret|= 4
  279. else:
  280. ret|= amt >> -diff
  281. ret = ret & 7
  282. if ret > 1:
  283. ret_dict[ch] = ret
  284. # Calculate the symms
  285. new_symm = set()
  286. for x,y in symm:
  287. #print(f"\t\t\tTrying {chr(x)}{chr(y)}")
  288. # These symbols will not be part of the output, so skip them
  289. if x not in ret_dict or y not in ret_dict:
  290. #print(f"\t\t\t Skipped because {x not in ret_dict} or {y not in ret_dict}")
  291. continue
  292. # No way it affects the order
  293. if x not in b and y not in b:
  294. #print(f"\t\t\t Added because x not in b and y not in b")
  295. new_symm|= {(x,y)}
  296. continue
  297. # Out of order in a
  298. fail = False
  299. for i in range(len(a)):
  300. if a[i] != x:continue
  301. for j in range(i):
  302. if a[j] == y:fail = True;break#print(f"\t\t\t Failed because a[{j}]=={chr(y)} and a[{i}]=={chr(x)}");break
  303. if fail:break
  304. if fail:continue
  305. # If x in a, does something imply b appears not after y?
  306. if x in a:
  307. for ch in b:
  308. if ch == x or (ch == y and chrs_dict[y] == 2) or (ch, y) in symm:break
  309. else:
  310. #print(f"\t\t\t Failed because nothing implies b appears not after {chr(y)}")
  311. fail = True
  312. if fail:continue
  313. # If y in a, does something imply b appears not before x?
  314. if y in a:
  315. for ch in b:
  316. if ch == y or (ch == x and chrs_dict[x] == 2) or (x, ch) in symm:break
  317. else:
  318. #print(f"\t\t\t Failed because nothing implies b appears not before {chr(x)}")
  319. fail = True
  320. #print(f"\t\t\t Conditions passed so adding {chr(x)}{chr(y)}")
  321. new_symm|= {(x,y)}
  322. # Add single char symmetries caused by new additions
  323. for ch0 in a:
  324. if ch0 in chrs_dict:continue
  325. for ch1 in chrs_dict:
  326. if ch1 in a:continue
  327. if ch1 not in ret_dict:continue
  328. for chb in b:
  329. if (ch1, chb) in symm:
  330. #print(f"\t\t\t Adding single char symmetry {chr(ch1)}{chr(ch0)} because {chr(ch1)}{chr(chb)}")
  331. new_symm|= {(ch1,ch0)}
  332. if (chb, ch1) in symm:
  333. #print(f"\t\t\t Adding single char symmetry {chr(ch0)}{chr(ch1)} because {chr(chb)}{chr(ch1)}")
  334. new_symm|= {(ch0, ch1)}
  335.  
  336.  
  337. # Add symmetries in a that are preserved
  338. for x,y in a_symm:
  339. add = None
  340. if x not in chrs_dict or y not in chrs_dict:
  341. add = {(x,y)}
  342. else:
  343. continue
  344. if x in chrs_dict:
  345. for z,_x in new_symm:
  346. if _x == x:add|= {(z,y)}
  347. if y in chrs_dict:
  348. for _y, z in new_symm:
  349. if _y == y:add|= {(x,z)}
  350. new_symm|= add
  351.  
  352. return frozenset({(k, ret_dict[k]) for k in ret_dict}), new_symm
  353.  
  354. def xprint(k,v):
  355. out = "("
  356. for ch,amt in k:
  357. if amt < 2 or amt > 7:print("amt",amt);exit(-1)
  358. amtdsc = ["??", "0?", "1", "0 or 1", "2 or more", "not 1", "1 or more", "any amount"][amt]
  359. out+= f"{chr(ch)} -> {amtdsc}, "
  360. out+= ") => ["
  361. for a,b in v:
  362. out+= f" {chr(a)}{chr(b)} "
  363. out+= "]"
  364. print(out)
  365.  
  366. code =\
  367. b"""
  368. yes
  369. =|
  370. =!
  371. a|b=|>
  372. >b=b>
  373. >!c=!
  374. |!=yes
  375. """[1:-1]
  376.  
  377. res = compile_f(code)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement