Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Script to parse XML chapter data from The Letter
- # to check for improperly used italics tags
- #
- # Written by HenryEx
- #
- # script for QuickBMS http://quickbms.aluigi.org
- # set misc. standard strings / variables
- get FILENAME basename 0
- get FILESIZE asize 0
- set CHAPTER string ""
- set CHNAME string ""
- set CR binary "\x0D" # Carriage Return
- set LF binary "\x0A" # Line Feed
- set CRLF short 2573 # CR+LF
- get XMLHEADER line 0
- if XMLHEADER != "<?xml version="1.0"?>"
- print "[!] Error: incorrect XML header found! Exiting..."
- CleanExit
- endif
- findloc STARTITEMS string "<items>" 0 # offset of start bracket for items
- # setup virtual memory file
- math TMP = 1000
- math TMP *= 0x8000 # 32 MB
- log MEMORY_FILE2 0 0
- putvarchr MEMORY_FILE2 TMP 0 # improves the speed with pre-allocation
- log MEMORY_FILE2 0 0 # reset the position and size of the file
- # ========================================
- # process first lines
- for i = 0 < 1
- get DATA line 0
- savepos OFFSET 0 # offset of next line
- if OFFSET >= STARTITEMS
- math OFFSET = STARTITEMS
- math i = 1 # stop reading header lines
- endif
- if DATA != "" # parse other header items
- if DATA & "</Chapter>"
- string DATA > "</Chapter>" # clip off end tag
- string DATA | "<Chapter>" # clip off start tag
- set CHAPTER string DATA
- elif DATA & "</ChapterName>"
- string DATA > "</ChapterName>" # clip off end tag
- string DATA | "<ChapterName>" # clip off start tag
- set CHNAME string DATA
- endif
- endif
- next
- # ========================================
- # go to offset after <items>
- math OFFSET + 7
- goto OFFSET 0
- # process VNItems
- for i = 0 < 1
- # set up default field values
- math ACTIONID = 0
- string INDEX = "?"
- string VNTEXT = ""
- set TSTACK long 0 # tag stack
- # item fetch across lines
- findloc TAG_OFF string "<VNItem" 0 ""
- if TAG_OFF != ""
- findloc TAG_END string "</VNItem>"
- xmath TAG_SZ "TAG_END + 9 - TAG_OFF"
- goto TAG_OFF 0
- getdstring DATA TAG_SZ 0
- else
- math i = 1 # no more lines found, stop
- continue # end for loop
- endif
- # get action id
- string TEMP = DATA
- string TEMP 0| "id=\""
- string TEMP 0% "\""
- if TEMP != ""
- math ACTIONID = TEMP
- endif
- # abort if we got bogus data
- if ACTIONID > 48
- print "[!] Error: action ID %ACTIONID% on line %INDEX% out of bounds! Exiting..."
- CleanExit
- endif
- # skip rest of loop if not a dialogue line
- if ACTIONID != 1
- continue
- endif
- # get index
- string TEMP = DATA
- string TEMP 0| "idx=\""
- string TEMP 0% "\""
- if TEMP != ""
- math INDEX = TEMP
- endif
- if INDEX > 9999 # whoa what
- string LINETEXT = "[!]Error: Index of 10k and above unsupported!"
- putct LINETEXT string -1 MEMORY_FILE2
- math i = 1 # stop everything
- continue # abort, abort
- endif
- # get VN text
- string TEMP = DATA
- string TEMP 0| "vntext=\""
- string TEMP 0% "\""
- if TEMP != ""
- callfunction CLEAN_XML_STR 1
- set VNTEXT string TEMP
- endif
- # parse values into text file
- string LINETEXT p "%.4i: " INDEX # get 4 digit idx
- # print "Parse action 1: say @ idx %INDEX%"
- set TEMP string VNTEXT
- for j = 0 < 1 # count [i] tags
- string TEMP 0| "[i]"
- if TEMP != ""
- math TSTACK + 1
- else
- math j = 1
- endif
- next
- set TEMP string VNTEXT
- for j = 0 < 1 # count [/i] tags
- string TEMP 0> "[/i]"
- if TEMP != ""
- math TSTACK - 1
- else
- math j = 1
- endif
- next
- if TSTACK != 0 # number of [i] and [/i] tags don't match up
- string LINETEXT + "Tag error value "
- string LINETEXT + TSTACK
- string LINETEXT + " in text \""
- string LINETEXT + VNTEXT
- string LINETEXT + "\""
- else
- string LINETEXT = ""
- endif
- if LINETEXT != ""
- putct LINETEXT string -1 MEMORY_FILE2
- put CRLF short MEMORY_FILE2
- endif
- next
- # ========================================
- set FILENAME string CHAPTER
- string FILENAME + ". "
- string FILENAME + CHNAME
- string FILENAME + ".txt"
- get MLENGTH asize MEMORY_FILE2
- if MLENGTH > 0
- log FILENAME 0 MLENGTH MEMORY_FILE2
- endif
- CleanExit
- # function to replace XML escape characters in TEMP string
- startfunction CLEAN_XML_STR
- string TEMP - CR # remove carriage returns
- string TEMP - LF # remove line feeds
- string TEMP replace "<" "<"
- string TEMP replace ">" ">"
- string TEMP replace "&" "&"
- string TEMP replace """ "\""
- string TEMP replace "'" "'"
- endfunction
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement