Advertisement
HenryEx

Parse italics tags in The Letter xml data

Nov 8th, 2017
183
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.61 KB | None | 0 0
  1. # Script to parse XML chapter data from The Letter
  2. # to check for improperly used italics tags
  3. #
  4. # Written by HenryEx
  5. #
  6. # script for QuickBMS http://quickbms.aluigi.org
  7.  
  8.  
  9. # set misc. standard strings / variables
  10. get FILENAME basename 0
  11. get FILESIZE asize 0
  12. set CHAPTER string ""
  13. set CHNAME string ""
  14. set CR binary "\x0D" # Carriage Return
  15. set LF binary "\x0A" # Line Feed
  16. set CRLF short 2573 # CR+LF
  17. get XMLHEADER line 0
  18. if XMLHEADER != "<?xml version="1.0"?>"
  19. print "[!] Error: incorrect XML header found! Exiting..."
  20. CleanExit
  21. endif
  22. findloc STARTITEMS string "<items>" 0 # offset of start bracket for items
  23.  
  24. # setup virtual memory file
  25. math TMP = 1000
  26. math TMP *= 0x8000 # 32 MB
  27.  
  28. log MEMORY_FILE2 0 0
  29. putvarchr MEMORY_FILE2 TMP 0 # improves the speed with pre-allocation
  30. log MEMORY_FILE2 0 0 # reset the position and size of the file
  31.  
  32.  
  33.  
  34. # ========================================
  35.  
  36. # process first lines
  37. for i = 0 < 1
  38. get DATA line 0
  39. savepos OFFSET 0 # offset of next line
  40.  
  41. if OFFSET >= STARTITEMS
  42. math OFFSET = STARTITEMS
  43. math i = 1 # stop reading header lines
  44. endif
  45.  
  46. if DATA != "" # parse other header items
  47. if DATA & "</Chapter>"
  48. string DATA > "</Chapter>" # clip off end tag
  49. string DATA | "<Chapter>" # clip off start tag
  50. set CHAPTER string DATA
  51. elif DATA & "</ChapterName>"
  52. string DATA > "</ChapterName>" # clip off end tag
  53. string DATA | "<ChapterName>" # clip off start tag
  54. set CHNAME string DATA
  55. endif
  56. endif
  57. next
  58.  
  59.  
  60. # ========================================
  61.  
  62. # go to offset after <items>
  63. math OFFSET + 7
  64. goto OFFSET 0
  65.  
  66. # process VNItems
  67. for i = 0 < 1
  68. # set up default field values
  69. math ACTIONID = 0
  70. string INDEX = "?"
  71. string VNTEXT = ""
  72. set TSTACK long 0 # tag stack
  73.  
  74. # item fetch across lines
  75. findloc TAG_OFF string "<VNItem" 0 ""
  76. if TAG_OFF != ""
  77. findloc TAG_END string "</VNItem>"
  78. xmath TAG_SZ "TAG_END + 9 - TAG_OFF"
  79. goto TAG_OFF 0
  80. getdstring DATA TAG_SZ 0
  81. else
  82. math i = 1 # no more lines found, stop
  83. continue # end for loop
  84. endif
  85.  
  86. # get action id
  87. string TEMP = DATA
  88. string TEMP 0| "id=\""
  89. string TEMP 0% "\""
  90. if TEMP != ""
  91. math ACTIONID = TEMP
  92. endif
  93.  
  94. # abort if we got bogus data
  95. if ACTIONID > 48
  96. print "[!] Error: action ID %ACTIONID% on line %INDEX% out of bounds! Exiting..."
  97. CleanExit
  98. endif
  99.  
  100. # skip rest of loop if not a dialogue line
  101. if ACTIONID != 1
  102. continue
  103. endif
  104.  
  105. # get index
  106. string TEMP = DATA
  107. string TEMP 0| "idx=\""
  108. string TEMP 0% "\""
  109. if TEMP != ""
  110. math INDEX = TEMP
  111. endif
  112.  
  113. if INDEX > 9999 # whoa what
  114. string LINETEXT = "[!]Error: Index of 10k and above unsupported!"
  115. putct LINETEXT string -1 MEMORY_FILE2
  116. math i = 1 # stop everything
  117. continue # abort, abort
  118. endif
  119.  
  120. # get VN text
  121. string TEMP = DATA
  122. string TEMP 0| "vntext=\""
  123. string TEMP 0% "\""
  124. if TEMP != ""
  125. callfunction CLEAN_XML_STR 1
  126. set VNTEXT string TEMP
  127. endif
  128.  
  129.  
  130. # parse values into text file
  131. string LINETEXT p "%.4i: " INDEX # get 4 digit idx
  132.  
  133. # print "Parse action 1: say @ idx %INDEX%"
  134.  
  135. set TEMP string VNTEXT
  136. for j = 0 < 1 # count [i] tags
  137. string TEMP 0| "[i]"
  138. if TEMP != ""
  139. math TSTACK + 1
  140. else
  141. math j = 1
  142. endif
  143. next
  144.  
  145. set TEMP string VNTEXT
  146. for j = 0 < 1 # count [/i] tags
  147. string TEMP 0> "[/i]"
  148. if TEMP != ""
  149. math TSTACK - 1
  150. else
  151. math j = 1
  152. endif
  153. next
  154.  
  155. if TSTACK != 0 # number of [i] and [/i] tags don't match up
  156. string LINETEXT + "Tag error value "
  157. string LINETEXT + TSTACK
  158. string LINETEXT + " in text \""
  159. string LINETEXT + VNTEXT
  160. string LINETEXT + "\""
  161. else
  162. string LINETEXT = ""
  163. endif
  164.  
  165. if LINETEXT != ""
  166. putct LINETEXT string -1 MEMORY_FILE2
  167. put CRLF short MEMORY_FILE2
  168. endif
  169.  
  170. next
  171.  
  172.  
  173. # ========================================
  174.  
  175. set FILENAME string CHAPTER
  176. string FILENAME + ". "
  177. string FILENAME + CHNAME
  178. string FILENAME + ".txt"
  179.  
  180. get MLENGTH asize MEMORY_FILE2
  181. if MLENGTH > 0
  182. log FILENAME 0 MLENGTH MEMORY_FILE2
  183. endif
  184.  
  185. CleanExit
  186.  
  187.  
  188. # function to replace XML escape characters in TEMP string
  189. startfunction CLEAN_XML_STR
  190. string TEMP - CR # remove carriage returns
  191. string TEMP - LF # remove line feeds
  192. string TEMP replace "&lt;" "<"
  193. string TEMP replace "&gt;" ">"
  194. string TEMP replace "&amp;" "&"
  195. string TEMP replace "&quot;" "\""
  196. string TEMP replace "&apos;" "'"
  197. endfunction
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement