Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Parses a dies irae "script" extracted from memory and dumps the content text
- #include <stdint.h>
- #include <stdio.h>
- #include <stdlib.h>
- #define and &&
- #define or ||
- #ifndef NULL
- #define NULL 0
- #endif
- int fgetc_or_die(FILE * a)
- {
- int got = fgetc(a);
- if(feof(a) or ferror(a) or got < 0x00 or got > 0xFF) exit(0);
- return got;
- }
- FILE * script;
- FILE * text;
- uint32_t data;
- void push(uint32_t codepoint)
- {
- if(codepoint == 0xFFFFFFFF) return;
- if(codepoint < 0x20 and codepoint != 0x0D and codepoint != 0x0A)
- fprintf(stderr, "Tried to output a control character %08X\n", (unsigned int)ftell(script)), exit(0);
- if(codepoint < 0x80)
- {
- putc(codepoint&0xFF, text);
- }
- else if(codepoint < 0x800)
- {
- putc(((codepoint&(0x1F<<6))>>6)|0xC0,text);
- putc(((codepoint&(0x3F<<0))>>0)|0x80,text);
- }
- else if(codepoint < 0x10000)
- {
- putc(((codepoint&(0x0F<<12))>>12)|0xE0,text);
- putc(((codepoint&(0x3F<< 6))>> 6)|0x80,text);
- putc(((codepoint&(0x3F<< 0))>> 0)|0x80,text);
- }
- else if(codepoint < 0x110000)
- {
- putc(((codepoint&(0x07<<18))>>18)|0xF0,text);
- putc(((codepoint&(0x3F<<12))>>12)|0x80,text);
- putc(((codepoint&(0x3F<< 6))>> 6)|0x80,text);
- putc(((codepoint&(0x3F<< 0))>> 0)|0x80,text);
- }
- }
- uint32_t get_codepoint(FILE * file)
- {
- data = 0;
- #define CONT()\
- {\
- value <<= 6;\
- int byte = fgetc_or_die(file);\
- if(byte < 0x80 or byte >= 0xC0)\
- printf("Invalid continuation byte at %08X in input\n", (unsigned int)ftell(file)-1), fflush(stderr), exit(0);\
- value |= byte&0x3F;\
- }
- uint32_t value = 0;
- int init = fgetc_or_die(file);
- if(init == 0x06)
- // unknown but is a prefix to another codepoint
- // FIXME: Values other than 0 (which is a pagefeed) should be encoded properly!
- {
- value = get_codepoint(file);
- if(value == 0)
- {
- push(0x0A);
- push(0x0A);
- return 0xFFFFFFFF;
- }
- else
- return 0xFFFFFFFF;
- }
- if(init < 0 or init > 0xFF)
- puts("File over"), fflush(stderr), exit(0);
- else if(init < 0x80)
- {
- value = init;
- }
- else if(init < 0xC0 or init == 0xC0 or init == 0xC1 or init == 0xF5 or init == 0xF6 or init == 0xF7)
- fprintf(stderr, "Invalid initial byte %02X at %08X in input\n", init, (unsigned int)ftell(file)-1), fflush(stderr), exit(0);
- else if(init < 0xE0)
- {
- value |= init&0x1F;
- if(value == 0)
- fprintf(stderr, "Overlong encoding near %08X in input\n", (unsigned int)ftell(file)-1), fflush(stderr), exit(0);
- CONT()
- }
- else if(init < 0xF0)
- {
- value |= init&0x0F;
- if(value == 0)
- fprintf(stderr, "Overlong encoding near %08X in input\n", (unsigned int)ftell(file)-1), fflush(stderr), exit(0);
- CONT()
- CONT()
- }
- else
- {
- value |= init&0x07;
- if(value == 0)
- fprintf(stderr, "Overlong encoding near %08X in input\n", (unsigned int)ftell(file)-1), fflush(stderr), exit(0);
- CONT()
- CONT()
- CONT()
- }
- if(value > 0x10FFFF or (value > 0xD800 and value <= 0xDFFF))
- fprintf(stderr, "Invalid codepoint constrcted near %08X in input\n", (unsigned int)ftell(file)-1), fflush(stderr), exit(0);
- return value;
- }
- void pull()
- {
- data = get_codepoint(script);
- }
- int main(int argc, char ** argv)
- {
- if(argc < 2) return puts("no input given"), 0;
- script = fopen(argv[1], "rb");
- if(!script) return puts("failed to open input"), 0;
- text = stdout;
- while(1)
- {
- pull();
- if(data == 0x01) // furigana
- {
- push(0x3008);
- while(1)
- {
- pull();
- //if(data == 0x0A) break;
- if(data >= 0x20)
- push(data);
- else break;
- }
- push(0x3009);
- push(0x300A);
- while(1)
- {
- pull();
- if(data >= 0x20 or data == 0x0A)
- push(data);
- else break;
- }
- push(0x300B);
- }
- else if(data == 0x08) // audio filename is not content text, skip
- {
- while(1)
- {
- pull();
- if(data == 0x00) break;
- }
- }
- else if(data < 0x20)
- {
- if(data == 0x0A or data == 0x0D)
- {
- push(data);
- }
- }
- else
- {
- push(data);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement