Advertisement
Guest User

Untitled

a guest
Feb 17th, 2013
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.93 KB | None | 0 0
  1. struct Node : Markup::Node {
  2. protected:
  3. inline string escape() const {
  4. string result = data;
  5. result.replace("&", "&");
  6. result.replace("<", "&lt;");
  7. result.replace(">", "&gt;");
  8. if(attribute == false) return result;
  9. result.replace("\'", "&apos;");
  10. result.replace("\"", "&quot;");
  11. return result;
  12. }
  13.  
  14. inline bool isName(char c) const {
  15. if(c >= 'A' && c <= 'Z') return true;
  16. if(c >= 'a' && c <= 'z') return true;
  17. if(c >= '0' && c <= '9') return true;
  18. if(c == '.' || c == '_') return true;
  19. if(c == '?') return true;
  20. return false;
  21. }
  22.  
  23. inline bool isWhitespace(char c) const {
  24. if(c == ' ' || c == '\t') return true;
  25. if(c == '\r' || c == '\n') return true;
  26. return false;
  27. }
  28.  
  29. //copy part of string from source document into target string; decode markup while copying
  30. inline void copy(string &target, const char *source, unsigned length) {
  31. target.reserve(length + 1);
  32.  
  33. #if defined(NALL_XML_LITERAL)
  34. memcpy(target(), source, length);
  35. target[length] = 0;
  36. return;
  37. #endif
  38.  
  39. char *output = target();
  40. while(length) {
  41. if(*source == '&') {
  42. if(!memcmp(source, "&lt;", 4)) { *output++ = '<'; source += 4; length -= 4; continue; }
  43. if(!memcmp(source, "&gt;", 4)) { *output++ = '>'; source += 4; length -= 4; continue; }
  44. if(!memcmp(source, "&amp;", 5)) { *output++ = '&'; source += 5; length -= 5; continue; }
  45. if(!memcmp(source, "&apos;", 6)) { *output++ = '\''; source += 6; length -= 6; continue; }
  46. if(!memcmp(source, "&quot;", 6)) { *output++ = '\"'; source += 6; length -= 6; continue; }
  47. }
  48.  
  49. if(attribute == false && source[0] == '<' && source[1] == '!') {
  50. //comment
  51. if(!memcmp(source, "<!--", 4)) {
  52. source += 4, length -= 4;
  53. while(memcmp(source, "-->", 3)) source++, length--;
  54. source += 3, length -= 3;
  55. continue;
  56. }
  57.  
  58. //CDATA
  59. if(!memcmp(source, "<![CDATA[", 9)) {
  60. source += 9, length -= 9;
  61. while(memcmp(source, "]]>", 3)) *output++ = *source++, length--;
  62. source += 3, length -= 3;
  63. continue;
  64. }
  65. }
  66.  
  67. *output++ = *source++, length--;
  68. }
  69. *output = 0;
  70. }
  71.  
  72. inline bool parseExpression(const char *&p) {
  73. if(*(p + 1) != '!') return false;
  74.  
  75. //comment
  76. if(!memcmp(p, "<!--", 4)) {
  77. while(*p && memcmp(p, "-->", 3)) p++;
  78. if(!*p) throw "unclosed comment";
  79. p += 3;
  80. return true;
  81. }
  82.  
  83. //CDATA
  84. if(!memcmp(p, "<![CDATA[", 9)) {
  85. while(*p && memcmp(p, "]]>", 3)) p++;
  86. if(!*p) throw "unclosed CDATA";
  87. p += 3;
  88. return true;
  89. }
  90.  
  91. //DOCTYPE
  92. if(!memcmp(p, "<!DOCTYPE", 9)) {
  93. unsigned counter = 0;
  94. do {
  95. char n = *p++;
  96. if(!n) throw "unclosed DOCTYPE";
  97. if(n == '<') counter++;
  98. if(n == '>') counter--;
  99. } while(counter);
  100. return true;
  101. }
  102.  
  103. return false;
  104. }
  105.  
  106. //returns true if tag closes itself (<tag/>); false if not (<tag>)
  107. inline bool parseHead(const char *&p) {
  108. //parse name
  109. const char *nameStart = ++p; //skip '<'
  110. while(isName(*p)) p++;
  111. const char *nameEnd = p;
  112. copy(name, nameStart, nameEnd - nameStart);
  113. if(name.empty()) throw "missing element name";
  114.  
  115. //parse attributes
  116. while(*p) {
  117. while(isWhitespace(*p)) p++;
  118. if(!*p) throw "unclosed attribute";
  119. if(*p == '?' || *p == '/' || *p == '>') break;
  120.  
  121. //parse attribute name
  122. Node attribute;
  123. attribute.attribute = true;
  124.  
  125. const char *nameStart = p;
  126. while(isName(*p)) p++;
  127. const char *nameEnd = p;
  128. copy(attribute.name, nameStart, nameEnd - nameStart);
  129. if(attribute.name.empty()) throw "missing attribute name";
  130.  
  131. //parse attribute data
  132. if(*p++ != '=') throw "missing attribute value";
  133. char terminal = *p++;
  134. if(terminal != '\'' && terminal != '\"') throw "attribute value not quoted";
  135. const char *dataStart = p;
  136. while(*p && *p != terminal) p++;
  137. if(!*p) throw "missing attribute data terminal";
  138. const char *dataEnd = p++; //skip closing terminal
  139.  
  140. copy(attribute.data, dataStart, dataEnd - dataStart);
  141. children.append(attribute);
  142. }
  143.  
  144. //parse closure
  145. if(*p == '?' && *(p + 1) == '>') { p += 2; return true; }
  146. if(*p == '/' && *(p + 1) == '>') { p += 2; return true; }
  147. if(*p == '>') { p += 1; return false; }
  148. throw "invalid element tag";
  149. }
  150.  
  151. //parse element and all of its child elements
  152. inline void parseElement(const char *&p) {
  153. Node node;
  154. if(node.parseHead(p) == false) node.parse(p);
  155. children.append(node);
  156. }
  157.  
  158. //return true if </tag> matches this node's name
  159. inline bool parseClosureElement(const char *&p) {
  160. if(p[0] != '<' || p[1] != '/') return false;
  161. p += 2;
  162. const char *nameStart = p;
  163. while(*p && *p != '>') p++;
  164. if(*p != '>') throw "unclosed closure element";
  165. const char *nameEnd = p++;
  166. if(memcmp(name, nameStart, nameEnd - nameStart)) throw "closure element name mismatch";
  167. return true;
  168. }
  169.  
  170. //parse contents of an element
  171. inline void parse(const char *&p) {
  172. const char *dataStart = p, *dataEnd = p;
  173.  
  174. while(*p) {
  175. while(*p && *p != '<') p++;
  176. if(!*p) break;
  177. dataEnd = p;
  178. if(parseClosureElement(p) == true) break;
  179. if(parseExpression(p) == true) continue;
  180. parseElement(p);
  181. }
  182.  
  183. copy(data, dataStart, dataEnd - dataStart);
  184. }
  185. };
  186.  
  187. struct Document : Node {
  188. string error;
  189.  
  190. inline bool load(const char *document) {
  191. if(document == nullptr) return false;
  192. reset();
  193. try {
  194. parse(document);
  195. } catch(const char *error) {
  196. reset();
  197. this->error = error;
  198. return false;
  199. }
  200. return true;
  201. }
  202.  
  203. inline Document() {}
  204. inline Document(const char *document) { load(document); }
  205. };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement