Want more features on Pastebin? Sign Up, it's FREE!
Guest

Untitled

By: a guest on Feb 17th, 2013  |  syntax: None  |  size: 5.93 KB  |  views: 66  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. struct Node : Markup::Node {
  2. protected:
  3.   inline string escape() const {
  4.     string result = data;
  5.     result.replace("&", "&");
  6.     result.replace("<", "&lt;");
  7.     result.replace(">", "&gt;");
  8.     if(attribute == false) return result;
  9.     result.replace("\'", "&apos;");
  10.     result.replace("\"", "&quot;");
  11.     return result;
  12.   }
  13.  
  14.   inline bool isName(char c) const {
  15.     if(c >= 'A' && c <= 'Z') return true;
  16.     if(c >= 'a' && c <= 'z') return true;
  17.     if(c >= '0' && c <= '9') return true;
  18.     if(c == '.' || c == '_') return true;
  19.     if(c == '?') return true;
  20.     return false;
  21.   }
  22.  
  23.   inline bool isWhitespace(char c) const {
  24.     if(c ==  ' ' || c == '\t') return true;
  25.     if(c == '\r' || c == '\n') return true;
  26.     return false;
  27.   }
  28.  
  29.   //copy part of string from source document into target string; decode markup while copying
  30.   inline void copy(string &target, const char *source, unsigned length) {
  31.     target.reserve(length + 1);
  32.  
  33.     #if defined(NALL_XML_LITERAL)
  34.     memcpy(target(), source, length);
  35.     target[length] = 0;
  36.     return;
  37.     #endif
  38.  
  39.     char *output = target();
  40.     while(length) {
  41.       if(*source == '&') {
  42.         if(!memcmp(source, "&lt;",   4)) { *output++ = '<';  source += 4; length -= 4; continue; }
  43.         if(!memcmp(source, "&gt;",   4)) { *output++ = '>';  source += 4; length -= 4; continue; }
  44.         if(!memcmp(source, "&amp;",  5)) { *output++ = '&';  source += 5; length -= 5; continue; }
  45.         if(!memcmp(source, "&apos;", 6)) { *output++ = '\''; source += 6; length -= 6; continue; }
  46.         if(!memcmp(source, "&quot;", 6)) { *output++ = '\"'; source += 6; length -= 6; continue; }
  47.       }
  48.  
  49.       if(attribute == false && source[0] == '<' && source[1] == '!') {
  50.         //comment
  51.         if(!memcmp(source, "<!--", 4)) {
  52.           source += 4, length -= 4;
  53.           while(memcmp(source, "-->", 3)) source++, length--;
  54.           source += 3, length -= 3;
  55.           continue;
  56.         }
  57.  
  58.         //CDATA
  59.         if(!memcmp(source, "<![CDATA[", 9)) {
  60.           source += 9, length -= 9;
  61.           while(memcmp(source, "]]>", 3)) *output++ = *source++, length--;
  62.           source += 3, length -= 3;
  63.           continue;
  64.         }
  65.       }
  66.  
  67.       *output++ = *source++, length--;
  68.     }
  69.     *output = 0;
  70.   }
  71.  
  72.   inline bool parseExpression(const char *&p) {
  73.     if(*(p + 1) != '!') return false;
  74.  
  75.     //comment
  76.     if(!memcmp(p, "<!--", 4)) {
  77.       while(*p && memcmp(p, "-->", 3)) p++;
  78.       if(!*p) throw "unclosed comment";
  79.       p += 3;
  80.       return true;
  81.     }
  82.  
  83.     //CDATA
  84.     if(!memcmp(p, "<![CDATA[", 9)) {
  85.       while(*p && memcmp(p, "]]>", 3)) p++;
  86.       if(!*p) throw "unclosed CDATA";
  87.       p += 3;
  88.       return true;
  89.     }
  90.  
  91.     //DOCTYPE
  92.     if(!memcmp(p, "<!DOCTYPE", 9)) {
  93.       unsigned counter = 0;
  94.       do {
  95.         char n = *p++;
  96.         if(!n) throw "unclosed DOCTYPE";
  97.         if(n == '<') counter++;
  98.         if(n == '>') counter--;
  99.       } while(counter);
  100.       return true;
  101.     }
  102.  
  103.     return false;
  104.   }
  105.  
  106.   //returns true if tag closes itself (<tag/>); false if not (<tag>)
  107.   inline bool parseHead(const char *&p) {
  108.     //parse name
  109.     const char *nameStart = ++p;  //skip '<'
  110.     while(isName(*p)) p++;
  111.     const char *nameEnd = p;
  112.     copy(name, nameStart, nameEnd - nameStart);
  113.     if(name.empty()) throw "missing element name";
  114.  
  115.     //parse attributes
  116.     while(*p) {
  117.       while(isWhitespace(*p)) p++;
  118.       if(!*p) throw "unclosed attribute";
  119.       if(*p == '?' || *p == '/' || *p == '>') break;
  120.  
  121.       //parse attribute name
  122.       Node attribute;
  123.       attribute.attribute = true;
  124.  
  125.       const char *nameStart = p;
  126.       while(isName(*p)) p++;
  127.       const char *nameEnd = p;
  128.       copy(attribute.name, nameStart, nameEnd - nameStart);
  129.       if(attribute.name.empty()) throw "missing attribute name";
  130.  
  131.       //parse attribute data
  132.       if(*p++ != '=') throw "missing attribute value";
  133.       char terminal = *p++;
  134.       if(terminal != '\'' && terminal != '\"') throw "attribute value not quoted";
  135.       const char *dataStart = p;
  136.       while(*p && *p != terminal) p++;
  137.       if(!*p) throw "missing attribute data terminal";
  138.       const char *dataEnd = p++;  //skip closing terminal
  139.  
  140.       copy(attribute.data, dataStart, dataEnd - dataStart);
  141.       children.append(attribute);
  142.     }
  143.  
  144.     //parse closure
  145.     if(*p == '?' && *(p + 1) == '>') { p += 2; return true; }
  146.     if(*p == '/' && *(p + 1) == '>') { p += 2; return true; }
  147.     if(*p == '>') { p += 1; return false; }
  148.     throw "invalid element tag";
  149.   }
  150.  
  151.   //parse element and all of its child elements
  152.   inline void parseElement(const char *&p) {
  153.     Node node;
  154.     if(node.parseHead(p) == false) node.parse(p);
  155.     children.append(node);
  156.   }
  157.  
  158.   //return true if </tag> matches this node's name
  159.   inline bool parseClosureElement(const char *&p) {
  160.     if(p[0] != '<' || p[1] != '/') return false;
  161.     p += 2;
  162.     const char *nameStart = p;
  163.     while(*p && *p != '>') p++;
  164.     if(*p != '>') throw "unclosed closure element";
  165.     const char *nameEnd = p++;
  166.     if(memcmp(name, nameStart, nameEnd - nameStart)) throw "closure element name mismatch";
  167.     return true;
  168.   }
  169.  
  170.   //parse contents of an element
  171.   inline void parse(const char *&p) {
  172.     const char *dataStart = p, *dataEnd = p;
  173.  
  174.     while(*p) {
  175.       while(*p && *p != '<') p++;
  176.       if(!*p) break;
  177.       dataEnd = p;
  178.       if(parseClosureElement(p) == true) break;
  179.       if(parseExpression(p) == true) continue;
  180.       parseElement(p);
  181.     }
  182.  
  183.     copy(data, dataStart, dataEnd - dataStart);
  184.   }
  185. };
  186.  
  187. struct Document : Node {
  188.   string error;
  189.  
  190.   inline bool load(const char *document) {
  191.     if(document == nullptr) return false;
  192.     reset();
  193.     try {
  194.       parse(document);
  195.     } catch(const char *error) {
  196.       reset();
  197.       this->error = error;
  198.       return false;
  199.     }
  200.     return true;
  201.   }
  202.  
  203.   inline Document() {}
  204.   inline Document(const char *document) { load(document); }
  205. };
clone this paste RAW Paste Data