Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- namespace XmlParser
- {
- using System.Collections.Generic;
- using System.Linq;
- using System.Text.RegularExpressions;
- public class XmlParser
- {
- public readonly Regex Regex;
- public readonly string[] RegexGroupNames;
- public XmlParser(string pattern)
- {
- Regex = new Regex(pattern, RegexOptions.Compiled | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
- RegexGroupNames = Regex.GetGroupNames().Where(p => p != "0").ToArray();
- }
- protected XmlParser(Regex regex)
- {
- Regex = regex;
- RegexGroupNames = Regex.GetGroupNames().Where(p => p != "0").ToArray();
- }
- public static implicit operator XmlParser(Regex regex)
- {
- return new XmlParser(regex);
- }
- public IList<NamedCapture> ParseXml(string xml)
- {
- Match match = Regex.Match(xml);
- if (!match.Success)
- {
- return new NamedCapture[0];
- }
- var groups = from p in RegexGroupNames select new NamedGroup(p, match.Groups[p]);
- var captures = groups
- .SelectMany(p => p.Group.Captures.OfType<Capture>(), (p, q) => new NamedCapture(p.Name, q))
- .OrderBy(p => p.Capture.Index);
- return captures.ToList().AsReadOnly();
- }
- public static bool IsError(IList<NamedCapture> captures)
- {
- var last = captures.LastOrDefault();
- if (last == null)
- {
- return true;
- }
- return last.Name == "ERROR";
- }
- }
- }
- //#define XML11
- /* Based on (and with quotes from):
- *
- * Extensible Markup Language (XML) 1.0 (Fifth Edition)
- * W3C Recommendation 26 November 2008
- * http://www.w3.org/TR/2008/REC-xml-20081126/
- * Copyright © 2008 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C liability, trademark and document use rules apply.
- *
- * and
- *
- * Extensible Markup Language (XML) 1.1 (Second Edition)
- * W3C Recommendation 16 August 2006, edited in place 29 September 2006
- * http://www.w3.org/TR/2006/REC-xml11-20060816
- * Copyright © 2006 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C liability, trademark and document use rules apply.
- */
- namespace XmlParser
- {
- using System;
- public static class XmlChars
- {
- private static readonly string pair = @"[\uD800-\uDBFF][\uDC00-\uDFFF]";
- /* Character Range */
- #if !(XML11)
- private static readonly string charSingle = @"[\x09 \x0A \x0D \x20-\uD7FF \uE000-\uFFFD]".Replace(" ", String.Empty); // 2 of the Xml 1.0
- #else
- private static readonly string charSingle = @"[\x09 \x0A \x0D \x20-\x7E \x85 \xA0-\uD7FF \uE000-\uFFFD]".Replace(" ", String.Empty); // 2 and 2a of the Xml 1.1
- #endif
- private static readonly string charPair = pair; // 2
- //[2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] (XML 1.0)
- //[2] Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] (XML 1.1)
- //[2a] RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F] (XML 1.1)
- public static readonly string Char = "( " + charSingle + " | " + charPair + " )";
- /* White Space */
- //[3] S ::= (#x20 | #x9 | #xD | #xA)+
- public static readonly string Space = @"( [\x20 \x09 \x0D \x0A]+ )".Replace(" ", String.Empty);
- /* Names and Tokens */
- private static readonly string nameStartCharSingle = @"[: A-Z _ a-z \xC0-\xD6 \xD8-\xF6 \xF8-\u02FF \u0370-\u037D \u037F-\u1FFF \u200C-\u200D \u2070-\u218F \u2C00-\u2FEF \u3001-\uD7FF \uF900-\uFDCF \uFDF0-\uFFFD]".Replace(" ", String.Empty); // 4
- private static readonly string nameStartCharPair = pair; // 4
- //[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
- public static readonly string NameStartChar = "( " + nameStartCharSingle + " | " + nameStartCharPair + " )";
- //[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
- public static readonly string NameChar = NameStartChar.Replace(":", @": \- . 0-9 \xB7 \u0300-\u036F \u203F-\u2040".Replace(" ", String.Empty));
- /* Literals */
- //[13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
- public static readonly string PubidChar = @"( [\x20 \x0D \x0A a-zA-Z0-9 \-'()+,./:=?;!*#@$_%] )".Replace(" ", String.Empty);
- public static readonly string PubidCharLessQuote = PubidChar.Replace("'", String.Empty); // Used by 12
- /* Attribute Type */
- //[55] StringType ::= 'CDATA'
- public static readonly string StringType = "( CDATA )";
- //[56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
- public static readonly string TokenizedType = "( IDREFS|IDREF|ID|ENTITY|ENTITIES|NMTOKENS|NMTOKEN )";
- }
- }
- /* Based on (and with quotes from):
- *
- * Extensible Markup Language (XML) 1.0 (Fifth Edition)
- * W3C Recommendation 26 November 2008
- * http://www.w3.org/TR/2008/REC-xml-20081126/
- * Copyright © 2008 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C liability, trademark and document use rules apply.
- *
- * and
- *
- * Extensible Markup Language (XML) 1.1 (Second Edition)
- * W3C Recommendation 16 August 2006, edited in place 29 September 2006
- * http://www.w3.org/TR/2006/REC-xml11-20060816
- * Copyright © 2006 W3C® (MIT, ERCIM, Keio), All Rights Reserved. W3C liability, trademark and document use rules apply.
- */
- namespace XmlParser
- {
- public static class XmlTypes
- {
- private static readonly string TaggedSpace = @"(?<SPACE> " + XmlChars.Space + @" )";
- private static readonly string OptionalTaggedSpace = TaggedSpace + @"?";
- /* Names and Tokens */
- public static readonly string UntaggedName = @"( " + XmlChars.NameStartChar + XmlChars.NameChar + @"* )";
- //[5] Name ::= NameStartChar (NameChar)*
- public static readonly string Name = TaggedName("NAME");
- //[6] Names ::= Name (#x20 Name)*
- //public static readonly string Names =
- // @"(?<NAMES>" + Name +
- // @" (" +
- // @" (?<SPACE> \x20+ )" + Name +
- // @" )*" +
- // @")"; // Added + after \x20
- public static readonly string UntaggedNmToken = @"( " + XmlChars.NameChar + @"+ )";
- //[7] Nmtoken ::= (NameChar)+
- public static readonly string NmToken = TaggedNmToken("NMTOKEN");
- //[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
- //public static readonly string NmTokens =
- // @"(?<NMTOKENS>" + NmToken +
- // @" (" +
- // @" (?<SPACE> \x20+ )" + NmToken +
- // @" )*" +
- // @")"; // Added + after \x20
- /* Character Reference */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';
- public static readonly string CharRef =
- @"(" +
- @" (?<CHARREFOPEN> & \# )" +
- @" ( (?<CHARREFVALUE> [0-9]+ | x [0-9A-Fa-f]+ ) | (?<ERROR>) )" +
- @" (?(ERROR)| ( (?<CHARREFCLOSE> ; ) | (?<ERROR>) ) )" +
- @")";
- /* Entity Reference */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[68] EntityRef ::= '&' Name ';'
- public static readonly string EntityRef =
- @"(" +
- @" (?<ENTITYREFOPEN> & )" +
- @" ( " + TaggedName("ENTITYREFNAME") + @" | (?<ERROR>) )" +
- @" (?(ERROR)| ( (?<ENTITYREFCLOSE> ; ) | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[67] Reference ::= EntityRef | CharRef
- public static readonly string Reference = @"( " + CharRef + @" | " + EntityRef + @" )";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[69] PEReference ::= '%' Name ';'
- public static readonly string PEReference =
- @"(" +
- @" (?<PEREFOPEN> % )" +
- @" ( " + TaggedName("PEREFNAME") + @" | (?<ERROR>) )" +
- @" (?(ERROR)| ( (?<PEREFCLOSE> ; ) | (?<ERROR>) ) )" +
- @")";
- /* Literals */
- private static readonly string charLessPercentAmpQuote = XmlChars.Char.Replace("] | [", "-[%&']] | [");
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string entityValueSingleQuotes =
- @"(" +
- @" (?<ENTITYVALUEQUOTEOPEN> ' )" +
- @" (?(ERROR)| " +
- @" ( (?<TEXT> " + charLessPercentAmpQuote + @"+ ) | " + PEReference + @" | " + Reference + @" )" +
- @" )*" +
- @" (?(ERROR)| ( (?<ENTITYVALUEQUOTECLOSE> ' ) | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string entityValueDoubleQuotes = entityValueSingleQuotes.Replace("'", "\"");
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
- public static readonly string EntityValue = @"( " + entityValueSingleQuotes + @" | " + entityValueDoubleQuotes + @" )";
- private static readonly string charLessLtAmpQuote = XmlChars.Char.Replace("] | [", "-[<&']] | [");
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string attValueSingleQuotes =
- @"(" +
- @" (?<ATTRVALUEQUOTEOPEN> ' )" +
- @" (?(ERROR)| " +
- @" ( (?<TEXT> " + charLessLtAmpQuote + @"+ ) | " + Reference + @" )" +
- @" )*" +
- @" (?(ERROR)| ( (?<ATTRVALUEQUOTECLOSE> ' ) | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string attValueDoubleQuotes = attValueSingleQuotes.Replace("'", "\"");
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
- public static readonly string AttValue = @"( " + attValueSingleQuotes + @" | " + attValueDoubleQuotes + @" )";
- private static readonly string charLessQuote = XmlChars.Char.Replace("] | [", "-[']] | [");
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string systemLiteralSingleQuotes = @"( (?<SYSTEMLITERALQUOTEOPEN> ' ) (?<SYSTEMLITERAL> " + charLessQuote + @"+ )? ( (?<SYSTEMLITERALQUOTECLOSE> ' ) | (?<ERROR>) ) )";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string systemLiteralDoubleQuotes = systemLiteralSingleQuotes.Replace("'", "\"");
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
- public static readonly string SystemLiteral = @"( " + systemLiteralSingleQuotes + @" | " + systemLiteralDoubleQuotes + @" )";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
- public static readonly string PubidLiteral =
- @"(" +
- " (?<PUBIDLITERALQUOTEOPEN> \" ) (?<PUBID>" + XmlChars.PubidChar + "+ )? ( (?<PUBIDLITERALQUOTECLOSE> \" ) | (?<ERROR>) )" +
- @" | " +
- @" (?<PUBIDLITERALQUOTEOPEN> ' ) (?<PUBID>" + XmlChars.PubidCharLessQuote + @"+ )? ( (?<PUBIDLITERALQUOTECLOSE> \' ) | (?<ERROR>) )" +
- @")";
- /* Character Data */
- private static readonly string charLessLtAmp = XmlChars.Char.Replace("] | [", "-[<&]] | [");
- //[14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) // (XML 1.0)
- //[14] CharData ::= (Char* - (Char* ']]>' Char*)) // (XML 1.1)
- public static readonly string CharData =
- @"(" +
- @" (?<CDATA>" +
- @" ( (?!\]\]> ) " + charLessLtAmp + @" )+" +
- @" )" +
- @")";
- /* Comments */
- private static readonly string charLessMinus = XmlChars.Char.Replace("] | [", "-[-]] | [");
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
- public static readonly string Comment =
- @"(" +
- @" (?<COMMENTOPEN> <!-- )" +
- @" (?<COMMENT>" +
- @" (" +
- @" " + charLessMinus + @" | " + @" - " + charLessMinus +
- @" )+" +
- @" )?" +
- @" ( (?<COMMENTCLOSE> --> ) | (?<ERROR>) )" +
- @")";
- /* Processing Instructions */
- /// <summary>
- /// <remarks>Up to the caller to check for [Xx][Mm][Ll].</remarks>
- /// </summary>
- //[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
- public static readonly string PITarget = TaggedName("PINAME");
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
- public static readonly string PI =
- @"(" +
- @" (?<PIOPEN> <\? )" +
- @" (?! [Xx][Mm][Ll] ( " + XmlChars.Space + @" | \?> ) )" +
- @" " + PITarget +
- @" (" +
- @" " + TaggedSpace +
- @" (?<OTHER> ( (?! \?> )" + XmlChars.Char + @" )+ )?" +
- @" )?" +
- @" ( (?<PICLOSE> \?> ) | (?<ERROR>) )" +
- @")";
- /* CDATA Sections */
- //[19] CDStart ::= '<![CDATA['
- public static readonly string CDStart = @"(?<CDATAOPEN> <!\[CDATA\[ )";
- //[20] CData ::= (Char* - (Char* ']]>' Char*))
- public static readonly string CData =
- @"(?<CDATA>" +
- @" (" +
- @" (?! ]]> )" + XmlChars.Char +
- @" )+" +
- @")?";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[21] CDEnd ::= ']]>'
- public static readonly string CDEnd = @"( (?<CDATACLOSE> ]]> ) | (?<ERROR>) )";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[18] CDSect ::= CDStart CData CDEnd
- public static readonly string CDSect = @"( " + CDStart + CData + CDEnd + @" )";
- /* Prolog (part 1) */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[25] Eq ::= S? '=' S?
- public static readonly string Eq =
- @"( " + OptionalTaggedSpace +
- @" (" +
- @" (?<EQUAL> = )" + OptionalTaggedSpace + @" | (?<ERROR>)" +
- @" )" +
- @")";
- /* Encoding Declaration */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
- public static readonly string EncName = @"( (?<ENCODING> [A-Za-z][A-Za-z0-9._\-]* ) | (?<ERROR>) )";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string encodingDeclSingleQuotes =
- @"(" +
- @" (?<ENCODINGDECLQUOTEOPEN> ' )" + EncName +
- @" (?(ERROR)| ( (?<ENCODINGDECLQUOTECLOSE> ' ) | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string encodingDeclDoubleQuotes = encodingDeclSingleQuotes.Replace("'", "\"");
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
- public static readonly string EncodingDecl =
- @"( " + TaggedSpace +
- @" (?<XMLDECLATTRNAME> encoding )" + Eq +
- @" (?(ERROR)| ( " + encodingDeclSingleQuotes + @" | " + encodingDeclDoubleQuotes + @" | (?<ERROR>) ) )" +
- @")";
- /* Standalone Document Declaration */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string sdDeclSingleQuotes =
- @"(" +
- @" (?<SDDECLQUOTEOPEN> ' )" + @"( (?<STANDALONE> yes | no ) | (?<ERROR>) )" +
- @" (?(ERROR)| ( (?<SDDECLQUOTECLOSE> ' ) | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string sdDeclDoubleQuotes = sdDeclSingleQuotes.Replace("'", "\"");
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
- public static readonly string SDDecl =
- @"( " + TaggedSpace +
- @" (?<XMLDECLATTRNAME> standalone )" + Eq +
- @" (?(ERROR)| ( " + sdDeclSingleQuotes + @" | " + sdDeclDoubleQuotes + @" | (?<ERROR>) ) )" +
- @")";
- /* Prolog (part 2) */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[26] VersionNum ::= '1.[0-9]+'
- public static readonly string VersionNum = @"( (?<VERSION> 1\.[0-9]+ ) | (?<ERROR>) )";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[27] Misc ::= Comment | PI | S
- public static readonly string Misc = @"( " + Comment + @" | " + PI + @" | " + TaggedSpace + @" )";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string versionNumSingleQuotes =
- @"(" +
- @" (?<VERSIONNUMQUOTEOPEN> ' )" + VersionNum +
- @" (?(ERROR)| ( (?<VERSIONNUMQUOTECLOSE> ' ) | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- private static readonly string versionNumDoubleQuotes = versionNumSingleQuotes.Replace("'", "\"");
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
- public static readonly string VersionInfo =
- @"(" + TaggedSpace +
- @" (?<XMLDECLATTRNAME> version )" + Eq +
- @" (?(ERROR)| (" + versionNumSingleQuotes + @" | " + versionNumDoubleQuotes + @" | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
- public static readonly string XmlDecl =
- @"(" +
- @" (?<XMLDECLOPEN> <\?xml )" + VersionInfo +
- @" (?(ERROR)| " + EncodingDecl + @"? )" +
- @" (?(ERROR)| " + SDDecl + @"? )" +
- @" (?(ERROR)| " + OptionalTaggedSpace +
- @" ( (?<XMLDECLCLOSE> \?> ) | (?<ERROR>) )" +
- @" )" +
- @")";
- /* Element-content Models */
- private static readonly string cpOpen = @"(?<DTDCPBRACKETOPEN> \( (?<_DTDCPDEPTH>) (?<_DTDCPCURRENTSIGN>) )";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
- public static readonly string CP =
- @"(" +
- @" " + cpOpen +
- @" (?<_DTDSTATENEEDCP>)" +
- @" (?(ERROR)| (?(_DTDCPDEPTH)" +
- @" (" +
- @" " + TaggedSpace +
- @" | " +
- @" (?(_DTDSTATENEEDCP)" +
- @" (" +
- @" " + cpOpen +
- @" | " +
- @" (" + TaggedName("DTDELEMENTCHILDNAME") + @" | " + PEReference + @" )" +
- @" (?(ERROR)| (?<DTDCONTENTQUANTITY> [?*+] )? (?<-_DTDSTATENEEDCP>) )" +
- @" | " +
- @" (?<ERROR>)" +
- @" )" +
- @" | " +
- @" (" +
- @" (?<DTDCPBRACKETCLOSE> \) (?<-_DTDCPDEPTH>) (?<-_DTDCPCURRENTSIGN>) )" +
- @" (?<DTDCONTENTQUANTITY> [?*+] )?" +
- @" | " +
- @" (?= [|,] )" +
- @" (" +
- @" (?<= \k<_DTDCPCURRENTSIGN> ) (?<-_DTDCPCURRENTSIGN>) (?<_DTDCPCURRENTSIGN>(?<DTDCPCONNECTOR> [|,] ) )" +
- @" | " +
- @" (?<DTDCPCONNECTOR> \k<_DTDCPCURRENTSIGN> )" +
- @" )" +
- @" (?<_DTDSTATENEEDCP>)" +
- @" | " +
- @" (?<ERROR>)" +
- @" )" +
- @" )" +
- @" ) )" +
- @" )*" +
- @" (?(ERROR) | (?(_DTDCPDEPTH) (?<ERROR>) ) )" +
- @" (?<-_DTDSTATENEEDCP>)* (?<-_DTDCPDEPTH>)* (?<-_DTDCPCURRENTSIGN>)*" +
- @")";
- //[47] children ::= (choice | seq) ('?' | '*' | '+')?
- //public static readonly string Children;
- //[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
- //public static readonly string Choice;
- //[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
- //public static readonly string Seq;
- /* Start-tag (part 1) */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[41] Attribute ::= Name Eq AttValue
- public static readonly string Attribute = @"( " + TaggedName("ATTRNAME") + Eq + @"(?(ERROR)| " + AttValue + @" ) )";
- /* Tags for Empty Elements */
- //[44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
- //public static readonly string EmptyElemTag = Attribute;
- /* Start-tag (part 2) */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[40] STag ::= '<' Name (S Attribute)* S? '>'
- public static readonly string STag =
- @"(" +
- @" (?<STARTELEMENTTAGOPEN> (?<STARTEMPTYELEMENTTAGOPEN> < ) )" +
- @" (?<_ELDEPTH> (?<EMPTYELEMENTNAME>" + TaggedName("ELEMENTNAME") + @" ) )" +
- @" (?(ERROR)| " + TaggedSpace + Attribute + @" )*" +
- @" (?(ERROR)| " + OptionalTaggedSpace +
- @" (" +
- @" (?<STARTEMPTYELEMENTTAGCLOSE> /> (?<-STARTELEMENTTAGOPEN>) (?<-_ELDEPTH>) (?<-ELEMENTNAME>) )" +
- @" | " +
- @" (?<STARTELEMENTTAGCLOSE> > (?<-STARTEMPTYELEMENTTAGOPEN>) (?<-EMPTYELEMENTNAME>) )" +
- @" | " +
- @" (?<ERROR>)" +
- @" )" +
- @" )" +
- @")";
- /* End-tag */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[42] ETag ::= '</' Name S? '>'
- //public static readonly string ETag;
- /* Content of Elements */
- //[43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
- //public static readonly string Content = Element + Reference + CDSect + "";
- /* Element */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[39] element ::= EmptyElemTag | STag content ETag
- public static readonly string Element =
- @"(" +
- @" (" + STag + @" | (?<ERROR>) )" +
- @" (?(ERROR)| (?(_ELDEPTH)" +
- @" (" +
- @" " + CharData +
- @" | " +
- @" " + Comment +
- @" | " +
- @" " + STag +
- @" | " +
- @" " + Reference +
- @" | " +
- @" " + CDSect +
- @" | " +
- @" " + PI +
- @" | " +
- @" (" +
- @" (?<ENDELEMENTTAGOPEN> </ )" +
- @" ( (?<ENDTAGGEDNAME>\k<_ELDEPTH> (?<-_ELDEPTH>) ) | (?<ERROR>) )" +
- @" (?(ERROR)| " + OptionalTaggedSpace +
- @" (?<ENDELEMENTTAGCLOSE> > )" +
- @" )" +
- @" )" +
- @" )" +
- @" ) )*" +
- @" (?(ERROR)| (?(_ELDEPTH) (?<ERROR>) ) )" +
- @" (?<-_ELDEPTH>)*" +
- @")";
- /* Mixed-content Declaration */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
- public static readonly string Mixed =
- @"(" +
- @" (?<DTDMIXEDBRACKETOPEN> \( )" + OptionalTaggedSpace + @"(?<DTDCONTENTTYPE> \#PCDATA )" +
- @" (?(ERROR)| (" + OptionalTaggedSpace +
- @" (?<DTDCPCONNECTOR> \| (?(_DTDCURRENTCPCONNECTOR)| (?<_DTDCURRENTCPCONNECTOR>) ) )" + OptionalTaggedSpace + TaggedName("DTDELEMENTCHILDNAME") +
- @" ) )*" + OptionalTaggedSpace +
- @" ( (?<DTDMIXEDBRACKETCLOSE> \) ) | (?<ERROR>) )" +
- @" (?(ERROR)| (?(_DTDCURRENTCPCONNECTOR) (?<-_DTDCURRENTCPCONNECTOR>) ( (?<DTDCONTENTQUANTITY> \* ) | (?<ERROR>) ) | (?<DTDCONTENTQUANTITY> \* )? ) )" +
- @" (?<-_DTDCURRENTCPCONNECTOR>)*" +
- @")";
- /* Element Type Declaration */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
- public static readonly string ContentSpec =
- @"(" +
- @" (?<CONTENTTYPE> EMPTY | ANY)" +
- @" | " +
- @"" + Mixed +
- @" | " +
- @"" + CP +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
- public static readonly string ElementDecl =
- @"(" +
- @" (?<DTDELEMENTDECLBRACKETOPEN> <! )" +
- @" (?<DTDNAME> ELEMENT )" + TaggedSpace +
- @" ( " + TaggedName("DTDELEMENTNAME") + @" | (?<ERROR>) )" +
- @" (?(ERROR)| " + @"( " + TaggedSpace + @" | (?<ERROR>) ) )" +
- @" (?(ERROR)| " + @"( " + ContentSpec + @" | (?<ERROR>) ) )" +
- @" (?(ERROR)| " + OptionalTaggedSpace +
- @" ( (?<DTDELEMENTDECLBRACKETCLOSE> > ) | (?<ERROR>) )" +
- @" )" +
- @")";
- /* Enumerated Attribute Types */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
- public static readonly string NotationType =
- @"(" +
- @" (?<DTDATTRTYPE> NOTATION )" + TaggedSpace +
- @" ( (?<DTDNOTATIONATTRTYPEBRACKETOPEN> \( ) | (?<ERROR>) )" +
- @" (?(ERROR)| (" + OptionalTaggedSpace + TaggedName("DTDNOTATIONATTRTYPENAME") + @" | (?<ERROR>) ) )" +
- @" (?(ERROR)| (" + OptionalTaggedSpace +
- @" (?<DTDNOTATIONATTRTYPECONNECTOR> \| )" + OptionalTaggedSpace + TaggedName("DTDELEMENTCHILDNAME") +
- @" ) )*" + OptionalTaggedSpace +
- @" (?(ERROR)| ( (?<DTDNOTATIONATTRTYPEBRACKETCLOSE> \) ) | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
- public static readonly string Enumeration =
- @"(" +
- @" (?<DTDENUMERATIONATTRTYPEBRACKETOPEN> \( )" +
- @" (" + OptionalTaggedSpace + TaggedNmToken("DTDENUMERATIONNMTOKEN") + @" | (?<ERROR>) )" +
- @" (?(ERROR)| (" + OptionalTaggedSpace +
- @" (?<DTDENUMERATIONATTRTYPECONNECTOR> \| )" + OptionalTaggedSpace + TaggedNmToken("DTDENUMERATIONNMTOKEN") +
- @" ) )*" + OptionalTaggedSpace +
- @" (?(ERROR)| ( (?<DTDENUMERATIONATTRTYPEBRACKETCLOSE> \) ) | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[57] EnumeratedType ::= NotationType | Enumeration
- public static readonly string EnumeratedType =
- @"(" +
- @"" + NotationType +
- @" | " +
- @"" + Enumeration +
- @")";
- /* Attribute Types */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[54] AttType ::= StringType | TokenizedType | EnumeratedType
- public static readonly string AttType =
- @"(" +
- @" (?<DTDATTRTYPE> CDATA )" +
- @" | " +
- @" (?<DTDATTRTYPE> IDREFS | IDREF | ID | ENTITIES | ENTITY | NMTOKENS | NMTOKEN )" +
- @" | " +
- @"" + EnumeratedType +
- @")";
- /* Attribute Defaults */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
- public static readonly string DefaultDecl =
- @"(" +
- @" (?<DTDATTRDECLDEFAULT> \#REQUIRED | \#IMPLIED )" +
- @" | " +
- @" ( (?<DTDATTRDECLDEFAULT> \#FIXED )" + TaggedSpace + @" )?" +
- @"" + AttValue +
- @")";
- /* Attribute-list Declaration */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[53] AttDef ::= S Name S AttType S DefaultDecl
- public static readonly string AttDef =
- @"(" +
- @"" + TaggedSpace +
- @"" + TaggedName("DTDATTRLISTNAME") +
- @" (" + TaggedSpace + @" | (?<ERROR>) )" +
- @" (?(ERROR)| (" + AttType + @" | (?<ERROR>) ) )" +
- @" (?(ERROR)| (" + TaggedSpace + @" | (?<ERROR>) ) )" +
- @" (?(ERROR)| (" + DefaultDecl + @" | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
- public static readonly string AttlistDecl =
- @"(" +
- @" (?<DTDATTRLISTDECLBRACKETOPEN> <! )" +
- @" (?<DTDNAME> ATTLIST )" + TaggedSpace +
- @" ( " + TaggedName("DTDATTLISTNAME") + @" | (?<ERROR>) )" +
- @" (?(ERROR)| " + AttDef + @" )*" +
- @" (?(ERROR)| " + OptionalTaggedSpace +
- @" ( (?<DTDATTRLISTDECLBRACKETCLOSE> > ) | (?<ERROR>) )" +
- @" )" +
- @")";
- /* External Entity Declaration */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
- public static readonly string ExternalID =
- @"(" +
- @" (" +
- @" (?<DTDIDTYPE> SYSTEM )" + TaggedSpace +
- @" (" + SystemLiteral + @" | (?<ERROR>) )" +
- @" )" +
- @" | " +
- @" (" +
- @" (?<DTDIDTYPE> PUBLIC )" + TaggedSpace +
- @" (" + PubidLiteral + @" | (?<ERROR>) )" +
- @" (?(ERROR)| ( " + TaggedSpace + @" | (?<ERROR>) ) )" +
- @" (?(ERROR)| ( " + SystemLiteral + @" | (?<ERROR>) ) )" +
- @" )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[76] NDataDecl ::= S 'NDATA' S Name
- public static readonly string NDataDecl =
- @"(" +
- @"" + TaggedSpace +
- @" (?<DTDNDATA> NDATA )" +
- @"" + TaggedSpace +
- @" ( " + TaggedName("DTDNDATANAME") + @" | (?<ERROR>) )" +
- @")";
- /* Entity Declaration */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
- public static readonly string EntityDef =
- @"(" +
- @"" + EntityValue +
- @" | " +
- @" (" +
- @" " + ExternalID +
- @" (?(ERROR)| " + NDataDecl + @"? )" +
- @" )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
- // Moved the "'<!ENTITY' S" and the "S? '>'" to EntityDecl
- public static readonly string GEDecl =
- @"(" +
- @"" + TaggedName("DTDENTITYNAME") +
- @" (?(ERROR)| " + @"( " + TaggedSpace + @" | (?<ERROR>) ) )" +
- @" (?(ERROR)| " + @"( " + EntityDef + @" | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[74] PEDef ::= EntityValue | ExternalID
- public static readonly string PEDef =
- @"(" +
- @"" + EntityValue +
- @" | " +
- @"" + ExternalID +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
- // Moved the "'<!ENTITY' S" and the "S? '>'" to EntityDecl
- public static readonly string PEDecl =
- @"(" +
- @" (?<DTDENTITYTYPE> % )" + TaggedSpace +
- @" ( " + TaggedName("DTDENTITYNAME") + @" | (?<ERROR>) )" +
- @" (?(ERROR)| " + @"( " + TaggedSpace + @" | (?<ERROR>) ) )" +
- @" (?(ERROR)| " + @"( " + PEDef + @" | (?<ERROR>) ) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[70] EntityDecl ::= GEDecl | PEDecl
- public static readonly string EntityDecl =
- @"(" +
- @" (?<DTDENTITYBRACKETOPEN> <! )" +
- @" (?<DTDNAME> ENTITY )" + TaggedSpace +
- @" (" + GEDecl + @" | " + PEDecl + @" | (?<ERROR>) )" +
- @" (?(ERROR)| " + OptionalTaggedSpace +
- @" ( (?<DTDENTITYBRACKETCLOSE> > ) | (?<ERROR>) )" +
- @" )" +
- @")";
- /* Text Declaration */
- //[77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
- //public static readonly string TextDecl;
- /* Notation Declarations */
- //[83] PublicID ::= 'PUBLIC' S PubidLiteral
- public static readonly string PublicID =
- @"(" +
- @" (?<DTDIDTYPE> PUBLIC )" + TaggedSpace +
- @" (" + PubidLiteral + @" | (?<ERROR>) )" +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
- public static readonly string NotationDecl =
- @"(" +
- @" (?<DTDNOTATIONBRACKETOPEN> <! )" +
- @" (?<DTDNAME> NOTATION )" + TaggedSpace +
- @" ( " + TaggedName("DTDNOTATIONNAME") + @" | (?<ERROR>) )" +
- @" (?(ERROR)| ( " + TaggedSpace + @" | (?<ERROR>) ) )" +
- @" (?(ERROR)| (" +
- @" " + PublicID + @" (?= " + XmlChars.Space + @"? > )" +
- @" | " +
- @" " + ExternalID +
- @" | " +
- @" (?<ERROR>)" +
- @" ) )" +
- @" (?(ERROR)| " + OptionalTaggedSpace +
- @" ( (?<DTDNOTATIONBRACKETCLOSE> > ) | (?<ERROR>) )" +
- @" )" +
- @")";
- /* Document Type Definition */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[28a] DeclSep ::= PEReference | S
- public static readonly string DeclSep = @"( " + PEReference + @" | " + TaggedSpace + @" )";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
- public static readonly string MarkupDecl =
- @"(" +
- @"" + ElementDecl +
- @" | " +
- @"" + AttlistDecl +
- @" | " +
- @"" + EntityDecl +
- @" | " +
- @"" + NotationDecl +
- @" | " +
- @"" + PI +
- @" | " +
- @"" + Comment +
- @")";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[28b]intSubset ::= (markupdecl | DeclSep)*
- public static readonly string IntSubset =
- @"(?(ERROR)| (" +
- @"" + MarkupDecl +
- @" | " +
- @"" + DeclSep +
- @") )*";
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
- public static readonly string DocTypeDecl =
- @"(" +
- @" (?<DTDDOCTYPEBRACKETOPEN> <! )" +
- @" (?<DTDNAME> DOCTYPE )" + TaggedSpace +
- @" ( " + TaggedName("DTDDOCTYPENAME") + @" | (?<ERROR>) )" +
- @" (?(ERROR)| ( " +
- @" " + TaggedSpace +
- @" " + ExternalID +
- @" ) )?" +
- @" (?(ERROR)| " + OptionalTaggedSpace +
- @" (" +
- @" (?<DTDINTSUBSETBRACKETOPEN> \[ )" +
- @" " + IntSubset +
- @" (?(ERROR)| ( (?<DTDINTSUBSETBRACKETCLOSE> \] ) | (?<ERROR>) ) )" +
- @" (?(ERROR)| " + OptionalTaggedSpace + @" )" +
- @" )?" +
- @" )" +
- @" (?(ERROR)| " + @" ( (?<DTDDOCTYPEBRACKETCLOSE> > ) | (?<ERROR>) ) )" +
- @")";
- /* Prolog (part 3) */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
- public static readonly string Prolog =
- @"(" + XmlDecl + @"?" +
- @" (?(ERROR)| " + Misc + @" )*" +
- @" (?(ERROR)| " + DocTypeDecl +
- @" (?(ERROR)| " + Misc + @" )*" +
- @" )?" +
- @")";
- /* Document */
- /// <summary>
- /// <remarks>Can set <ERROR>.</remarks>
- /// </summary>
- //[1] document ::= prolog element Misc* (XML 1.0)
- //[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* ) (XML 1.1)
- public static readonly string Document =
- @"(?>" +
- @" \A" +
- @" (" +
- @" " + Prolog +
- @" (?(ERROR)| " +
- @" (" + Element + @" | (?<ERROR>) )" +
- @" )" +
- @" (?(ERROR)| " + Misc + @"* )" +
- @" )" +
- @" ( \z | (?(ERROR)| (?<ERROR>) ) )" +
- @")";
- /* External Subset */
- //[31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
- //public static readonly string ExtSubsetDecl = MarkupDecl + ConditionalSect + DeclSep + "";
- //[30] extSubset ::= TextDecl? extSubsetDecl
- //public static readonly string ExtSubset = TextDecl + ExtSubsetDecl + "";
- /* Conditional Section */
- //[62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
- //public static readonly string IncludeSect = ExtSubsetDecl + "";
- //[65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
- //public static readonly string Ignore;
- //[64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
- //public static readonly string IgnoreSectContents = Ignore + "";
- //[63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
- //public static readonly string IgnoreSect = IgnoreSectContents + "";
- //[61] conditionalSect ::= includeSect | ignoreSect
- //public static readonly string ConditionalSect = IncludeSect + IgnoreSect + "";
- /* Well-Formed External Parsed Entity */
- //[78] extParsedEnt ::= ( TextDecl? content ) - ( Char* RestrictedChar Char* )
- //public static readonly string ExtParsedEnt = TextDecl + Content + "";
- public static string TaggedName(string name)
- {
- return @"(?<" + name + @">" + UntaggedName + @" )";
- }
- public static string TaggedNmToken(string name)
- {
- return @"(?<" + name + @">" + UntaggedNmToken + @" )";
- }
- }
- }
- namespace XmlParser
- {
- using System.Text.RegularExpressions;
- public class NamedCapture
- {
- public readonly string Name;
- public readonly Capture Capture;
- public NamedCapture(string name, Capture capture)
- {
- Name = name;
- Capture = capture;
- }
- public override string ToString()
- {
- return Name + ": " + Capture;
- }
- }
- }
- namespace XmlParser
- {
- using System.Text.RegularExpressions;
- public class NamedGroup
- {
- public readonly string Name;
- public readonly Group Group;
- public NamedGroup(string name, Group group)
- {
- Name = name;
- Group = group;
- }
- public override string ToString()
- {
- return Name + ": " + Group.Success;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement