Advertisement
Guest User

Untitled

a guest
Aug 25th, 2016
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.91 KB | None | 0 0
  1. element_symbols_pattern =
  2. r"C[laroudsemf]?|Os?|N[eaibdpos]?|S[icernbmg]?|P[drmtboau]?|"
  3. r"H[eofgas]?|c|n|o|s|p|A[lrsgutcm]|B[eraik]?|Dy|E[urs]|F[erm]?|"
  4. r"G[aed]|I[nr]?|Kr?|L[iaur]|M[gnodt]|R[buhenaf]|T[icebmalh]|"
  5. r"U|V|W|Xe|Yb?|Z[nr]|*"
  6.  
  7. atom_fields = [
  8. "raw_atom",
  9. "open_bracket",
  10. "weight",
  11. "element",
  12. "chiral_count",
  13. "chiral_named",
  14. "chiral_symbols",
  15. "hcount",
  16. "positive_count",
  17. "positive_symbols",
  18. "negative_count",
  19. "negative_symbols",
  20. "error_1",
  21. "error_2",
  22. "close_bracket",
  23. "error_3",
  24. ]
  25.  
  26. atom = re.compile(r"""
  27. (?P<raw_atom>Cl|Br|[cnospBCNOFPSI]) | # "raw" means outside of brackets
  28. (
  29. (?P<open_bracket>[) # Start bracket
  30. (?P<weight>d+)? # Atomic weight (optional)
  31. ( # valid term or error
  32. ( # valid term
  33. (?P<element>""" + element_symbols_pattern + r""") # element or aromatic
  34. ( # Chirality can be
  35. (?P<chiral_count>@d+) | # @1 @2 @3 ...
  36. (?P<chiral_named> # or
  37. @TH[12] | # @TA1 @TA2
  38. @AL[12] | # @AL1 @AL2
  39. @SP[123] | # @SP1 @SP2 @SP3
  40. @TB(1[0-9]?|20?|[3-9]) | # @TB{1-20}
  41. @OH(1[0-9]?|2[0-9]?|30?|[4-9])) | # @OH{1-30}
  42. (?P<chiral_symbols>@+) # or @@@@@@@...
  43. )? # and chirality is optional
  44. (?P<hcount>Hd*)? # Optional hydrogen count
  45. ( # Charges can be
  46. (?P<positive_count>+d+) | # +<number>
  47. (?P<positive_symbols>++) | # +++... This includes the single '+'
  48. (?P<negative_count>-d+) | # -<number>
  49. (?P<negative_symbols>-+) # ---... including a single '-'
  50. )? # and are optional
  51. (?P<error_1>[^]]+)? # If there's anything left, it's an error
  52. ) | ( # End of parsing stuff in []s, except
  53. (?P<error_2>[^]]*) # If there was an error, we get here
  54. ))
  55. ((?P<close_bracket>])| # End bracket
  56. (?P<error_3>$)) # unexpectedly reached end of string
  57. )
  58. """, re.X)
  59.  
  60. extern crate regex;
  61. use regex::Regex;
  62.  
  63. fn main() {
  64. let atom_fields: Vec<&'static str> = vec![
  65. "raw_atom",
  66. "open_bracket",
  67. "weight",
  68. "element",
  69. "chiral_count",
  70. "chiral_named",
  71. "chiral_symbols",
  72. "hcount",
  73. "positive_count",
  74. "positive_symbols",
  75. "negative_count",
  76. "negative_symbols",
  77. "error_1",
  78. "error_2",
  79. "close_bracket",
  80. "error_3"
  81. ];
  82.  
  83. const EL_SYMBOLS: &'static str = r#"(?P<element>S?|*")"#;
  84. let atom_re_str: &String = &String::from(vec![
  85. // r"(?P<raw_atom>Cl|Br|[cnospBCNOFPSI])|", // "raw" means outside of brackets
  86. r"(",
  87. r"(?P<open_bracket>[)", // Start bracket
  88. // r"(?P<weight>d+)?", // Atomic weight (optional)
  89. r"(", // valid term or error
  90. r"(", // valid term
  91. &EL_SYMBOLS, // element or aromatic
  92. // r"(", // Chirality can be
  93. // r"(?P<chiral_count>@d+)|", // @1 @2 @3 ...
  94. // r"(?P<chiral_named>", // or
  95. // r"@TH[12]|", // @TA1 @TA2
  96. // r"@AL[12]|", // @AL1 @AL2
  97. // r"@SP[123]|", // @SP1 @SP2 @SP3
  98. // r"@TB(1[0-9]?|20?|[3-9])|", // @TB{1-20}
  99. // r"@OH(1[0-9]?|2[0-9]?|30?|[4-9]))|", // @OH{1-30}
  100. // r"(?P<chiral_symbols>@+)", // or @@@@....,
  101. // r")?", // and chirality is optional
  102. // r"(?P<hcount>Hd*)?", // Optional hydrogen count
  103. // r"(", // Charges can be
  104. // r"(?P<positive_count>+d+)|", // +<number>
  105. // r"(?P<positive_symbols>++)|", // +++...including a single '+'
  106. // r"(?P<negative_count>-d+)|", // -<number>
  107. // r"(?P<negative_symbols>-+)", // ---... including a single '-'
  108. // r")?", // and are optional
  109. // r"(?P<error_1>[^]]+)?", // anything left is an error
  110. r")", // End of stuff in []s, except
  111. r"|((?P<error_2>[^]]*)", // If other error, we get here
  112. r"))",
  113. r"((?P<close_bracket>])|", // End bracket
  114. r"(?P<error_3>$)))"].join("")); // unexpected end of string
  115.  
  116. println!("generated regex: {}", &atom_re_str);
  117. let atom_re = Regex::new(&atom_re_str).unwrap();
  118.  
  119. for cur_char in "[S]".chars() {
  120. let cur_string = cur_char.to_string();
  121. println!("cur string: {}", &cur_string);
  122. let captures = atom_re.captures(&cur_string.as_str()).unwrap();
  123. // if captures.name("atom").is_some() {
  124. // for cur_field in &atom_fields {
  125. // let field_capture = captures.name(cur_field);
  126. // if cur_field.contains("error") {
  127. // if *cur_field == "error_3" {
  128. // // TODO replace me with a real error
  129. // println!("current char: {:?}", &cur_char);
  130. // panic!("Missing a close bracket (]). Looks like: {}.",
  131. // field_capture.unwrap());
  132. // } else {
  133. // panic!("I don't recognize the character. Looks like: {}.",
  134. // field_capture.unwrap());
  135. // }
  136. // } else {
  137. // println!("ok! matched {:?}", &cur_char);
  138. // }
  139. // }
  140. // }
  141. }
  142. }
  143.  
  144. ((?P<open_bracket>[)(((?P<element>S?|*"))|((?P<error_2>[^]]*)))((?P<close_bracket>])|(?P<error_3>$)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement