Advertisement
Guest User

Untitled

a guest
May 29th, 2017
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.59 KB | None | 0 0
  1. // Parses a dies irae "script" extracted from memory and dumps the content text
  2.  
  3. #include <stdint.h>
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6.  
  7. #define and &&
  8. #define or ||
  9.  
  10. #ifndef NULL
  11. #define NULL 0
  12. #endif
  13.  
  14. int fgetc_or_die(FILE * a)
  15. {
  16. int got = fgetc(a);
  17. if(feof(a) or ferror(a) or got < 0x00 or got > 0xFF) exit(0);
  18. return got;
  19. }
  20.  
  21. FILE * script;
  22. FILE * text;
  23. uint32_t data;
  24.  
  25. void push(uint32_t codepoint)
  26. {
  27. if(codepoint == 0xFFFFFFFF) return;
  28.  
  29. if(codepoint < 0x20 and codepoint != 0x0D and codepoint != 0x0A)
  30. fprintf(stderr, "Tried to output a control character %08X\n", (unsigned int)ftell(script)), exit(0);
  31. if(codepoint < 0x80)
  32. {
  33. putc(codepoint&0xFF, text);
  34. }
  35. else if(codepoint < 0x800)
  36. {
  37. putc(((codepoint&(0x1F<<6))>>6)|0xC0,text);
  38. putc(((codepoint&(0x3F<<0))>>0)|0x80,text);
  39. }
  40. else if(codepoint < 0x10000)
  41. {
  42. putc(((codepoint&(0x0F<<12))>>12)|0xE0,text);
  43. putc(((codepoint&(0x3F<< 6))>> 6)|0x80,text);
  44. putc(((codepoint&(0x3F<< 0))>> 0)|0x80,text);
  45. }
  46. else if(codepoint < 0x110000)
  47. {
  48. putc(((codepoint&(0x07<<18))>>18)|0xF0,text);
  49. putc(((codepoint&(0x3F<<12))>>12)|0x80,text);
  50. putc(((codepoint&(0x3F<< 6))>> 6)|0x80,text);
  51. putc(((codepoint&(0x3F<< 0))>> 0)|0x80,text);
  52. }
  53. }
  54.  
  55. uint32_t get_codepoint(FILE * file)
  56. {
  57. data = 0;
  58. #define CONT()\
  59. {\
  60. value <<= 6;\
  61. int byte = fgetc_or_die(file);\
  62. if(byte < 0x80 or byte >= 0xC0)\
  63. printf("Invalid continuation byte at %08X in input\n", (unsigned int)ftell(file)-1), fflush(stderr), exit(0);\
  64. value |= byte&0x3F;\
  65. }
  66. uint32_t value = 0;
  67. int init = fgetc_or_die(file);
  68.  
  69. if(init == 0x06)
  70. // unknown but is a prefix to another codepoint
  71. // FIXME: Values other than 0 (which is a pagefeed) should be encoded properly!
  72. {
  73. value = get_codepoint(file);
  74. if(value == 0)
  75. {
  76. push(0x0A);
  77. push(0x0A);
  78. return 0xFFFFFFFF;
  79. }
  80. else
  81. return 0xFFFFFFFF;
  82. }
  83.  
  84. if(init < 0 or init > 0xFF)
  85. puts("File over"), fflush(stderr), exit(0);
  86. else if(init < 0x80)
  87. {
  88. value = init;
  89. }
  90. else if(init < 0xC0 or init == 0xC0 or init == 0xC1 or init == 0xF5 or init == 0xF6 or init == 0xF7)
  91. fprintf(stderr, "Invalid initial byte %02X at %08X in input\n", init, (unsigned int)ftell(file)-1), fflush(stderr), exit(0);
  92. else if(init < 0xE0)
  93. {
  94. value |= init&0x1F;
  95. if(value == 0)
  96. fprintf(stderr, "Overlong encoding near %08X in input\n", (unsigned int)ftell(file)-1), fflush(stderr), exit(0);
  97. CONT()
  98. }
  99. else if(init < 0xF0)
  100. {
  101. value |= init&0x0F;
  102. if(value == 0)
  103. fprintf(stderr, "Overlong encoding near %08X in input\n", (unsigned int)ftell(file)-1), fflush(stderr), exit(0);
  104. CONT()
  105. CONT()
  106. }
  107. else
  108. {
  109. value |= init&0x07;
  110. if(value == 0)
  111. fprintf(stderr, "Overlong encoding near %08X in input\n", (unsigned int)ftell(file)-1), fflush(stderr), exit(0);
  112. CONT()
  113. CONT()
  114. CONT()
  115. }
  116. if(value > 0x10FFFF or (value > 0xD800 and value <= 0xDFFF))
  117. fprintf(stderr, "Invalid codepoint constrcted near %08X in input\n", (unsigned int)ftell(file)-1), fflush(stderr), exit(0);
  118. return value;
  119. }
  120.  
  121. void pull()
  122. {
  123. data = get_codepoint(script);
  124. }
  125.  
  126. int main(int argc, char ** argv)
  127. {
  128. if(argc < 2) return puts("no input given"), 0;
  129.  
  130. script = fopen(argv[1], "rb");
  131. if(!script) return puts("failed to open input"), 0;
  132.  
  133. text = stdout;
  134.  
  135. while(1)
  136. {
  137. pull();
  138. if(data == 0x01) // furigana
  139. {
  140. push(0x3008);
  141. while(1)
  142. {
  143. pull();
  144. //if(data == 0x0A) break;
  145. if(data >= 0x20)
  146. push(data);
  147. else break;
  148. }
  149. push(0x3009);
  150. push(0x300A);
  151. while(1)
  152. {
  153. pull();
  154. if(data >= 0x20 or data == 0x0A)
  155. push(data);
  156. else break;
  157. }
  158. push(0x300B);
  159. }
  160. else if(data == 0x08) // audio filename is not content text, skip
  161. {
  162. while(1)
  163. {
  164. pull();
  165. if(data == 0x00) break;
  166. }
  167. }
  168. else if(data < 0x20)
  169. {
  170. if(data == 0x0A or data == 0x0D)
  171. {
  172. push(data);
  173. }
  174. }
  175. else
  176. {
  177. push(data);
  178. }
  179. }
  180. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement