Guest User

Untitled

a guest
Jul 18th, 2018
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 27.04 KB | None | 0 0
  1. // ---------------------------------------------------------------------------------------------------------------------
  2. // This file is part of CodeBox, an open-source toolkit for D.
  3. //
  4. // Copyright (c) 2009, Sean Kerr.
  5. // All rights reserved.
  6. //
  7. // Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
  8. // following conditions are met:
  9. //
  10. // * Redistributions of source code must retain the above copyright notice, this list of conditions and the following
  11. // disclaimer.
  12. // * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
  13. // disclaimer in the documentation and/or other materials provided with the distribution.
  14. // * Neither the name CodeBox nor the names of its contributors may be used to endorse or promote products derived from
  15. // this software without specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  18. // INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  22. // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANYWAY OUT OF THE USE
  23. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. //
  25. // Author: Sean Kerr <sean@code-box.org>
  26. // Version: $Id$
  27. // ---------------------------------------------------------------------------------------------------------------------
  28.  
  29. module codebox.text.Regex;
  30.  
  31. // +-------------------------------------------------------------------------------------------------------------------+
  32. // | IMPORTS |
  33. // +-------------------------------------------------------------------------------------------------------------------+
  34.  
  35. private import codebox.capi.PCRE;
  36. private import codebox.core.Exception;
  37. private import tango.stdc.stringz;
  38.  
  39. debug (UnitTest) {
  40.  
  41. private import tango.io.Stdout;
  42.  
  43. }
  44.  
  45. // +-------------------------------------------------------------------------------------------------------------------+
  46. // | ENUMS |
  47. // +-------------------------------------------------------------------------------------------------------------------+
  48.  
  49. /**
  50. * Pattern matching modifiers.
  51. */
  52. public enum Modifier : int {
  53.  
  54. /** Enable case-insensitive matching. */
  55. I = PCRE_CASELESS,
  56.  
  57. /** Enable multi-line mode. */
  58. M = PCRE_MULTILINE,
  59.  
  60. /** Force a period (.) to match newline sequences. */
  61. S = PCRE_DOTALL,
  62.  
  63. /** Enable UTF-8 matching. */
  64. U = PCRE_UTF8
  65.  
  66. }
  67.  
  68. // +-------------------------------------------------------------------------------------------------------------------+
  69. // | CLASSES |
  70. // +-------------------------------------------------------------------------------------------------------------------+
  71.  
  72. /**
  73. * Regex represents a single compiled regular expression pattern upon which all matching, searching, replacing and
  74. * splitting of textual data occurs.
  75. */
  76. public class Regex {
  77.  
  78. /** The count of captured groups. */
  79. private int _groups;
  80.  
  81. /** The modifiers used when compiling the regular expression pattern. */
  82. private int _modifiers;
  83.  
  84. /** The group indexes and their associated group names. */
  85. private int[char[]] _names;
  86.  
  87. /** The regular expression pattern. */
  88. private char[] _pattern;
  89.  
  90. /** The internal pcre struct instance. */
  91. private pcre* _pcreIn;
  92.  
  93. /** The internal pcre_extra struct instance. */
  94. private pcre_extra* _pcreExtraIn;
  95.  
  96. // +---------------------------------------------------------------------------------------------------------------+
  97. // | CONSTRUCTORS / DESTRUCTOR |
  98. // +---------------------------------------------------------------------------------------------------------------+
  99.  
  100. /**
  101. * Create a new Regex instance.
  102. *
  103. * Params:
  104. * pcreIn = The internal pcre struct instance.
  105. * pcreExtraIn = The internal pcre_extra struct instance.
  106. * groups = The count of captured groups.
  107. * names = The group indexes and their associated group names.
  108. * pattern = The regular expression pattern.
  109. * modifiers = The modifiers used when compiling /pattern/.
  110. */
  111. private this (pcre* pcreIn, pcre_extra* pcreExtraIn, int groups, int[char[]] names, char[] pattern, int modifiers) {
  112.  
  113. _groups = groups;
  114. _modifiers = modifiers;
  115. _names = names;
  116. _pattern = pattern;
  117. _pcreIn = pcreIn;
  118. _pcreExtraIn = pcreExtraIn;
  119.  
  120. }
  121.  
  122. /**
  123. * Destroy this Regex instance.
  124. */
  125. public ~this () {
  126.  
  127. if (_pcreExtraIn) {
  128.  
  129. (*pcre_free)(_pcreExtraIn);
  130.  
  131. }
  132.  
  133. (*pcre_free)(_pcreIn);
  134.  
  135. }
  136.  
  137. // +---------------------------------------------------------------------------------------------------------------+
  138. // | OPERATORS |
  139. // +---------------------------------------------------------------------------------------------------------------+
  140.  
  141. /**
  142. * Handle an in statement that checks for existence of /group/ within the named groups.
  143. *
  144. * Params:
  145. * group = The group name.
  146. */
  147. public bool opIn_r (char[] group) {
  148.  
  149. return (group in _names) ? true : false;
  150.  
  151. }
  152.  
  153. // +---------------------------------------------------------------------------------------------------------------+
  154. // | METHODS |
  155. // +---------------------------------------------------------------------------------------------------------------+
  156.  
  157. /**
  158. * Compile a regular expression pattern.
  159. *
  160. * Params:
  161. * pattern = The regular expression pattern.
  162. * modifiers = The modifiers to use when compiling the regular expression pattern.
  163. *
  164. * Throws: RegexException If the pattern cannot be compiled.
  165. */
  166. public static Regex compile (char[] pattern, int modifiers = 0) {
  167.  
  168. char* error;
  169. int errorOffset;
  170. int groups;
  171. int nameCount;
  172. int nameSize;
  173. char* nameTable;
  174. int[char[]] names;
  175. pcre* pcreIn;
  176. pcre_extra* pcreExtraIn;
  177.  
  178. // compile the pattern
  179. pcreIn = pcre_compile(toStringz(pattern), modifiers, &error, &errorOffset, null);
  180.  
  181. if (error) {
  182.  
  183. throw new RegexException("Failed to compiled pattern at offset {}: {}", errorOffset, fromStringz(error));
  184.  
  185. }
  186.  
  187. // get pattern optimizations
  188. pcreExtraIn = pcre_study(pcreIn, 0, &error);
  189.  
  190. if (error) {
  191.  
  192. (*pcre_free)(pcreIn);
  193.  
  194. throw new RegexException("Failed to get pattern optimizations: {}", fromStringz(error));
  195.  
  196. }
  197.  
  198. // get pattern details
  199. pcre_fullinfo(pcreIn, pcreExtraIn, PCRE_INFO_CAPTURECOUNT, &groups);
  200. pcre_fullinfo(pcreIn, pcreExtraIn, PCRE_INFO_NAMECOUNT, &nameCount);
  201. pcre_fullinfo(pcreIn, pcreExtraIn, PCRE_INFO_NAMEENTRYSIZE, &nameSize);
  202. pcre_fullinfo(pcreIn, pcreExtraIn, PCRE_INFO_NAMETABLE, &nameTable);
  203.  
  204. // get all named groups and their related indexes
  205. while (nameCount--) {
  206.  
  207. names[fromStringz(nameTable + 2)] = (nameTable[0] << 8) | nameTable[1];
  208.  
  209. nameTable += nameSize;
  210.  
  211. }
  212.  
  213. return new Regex(pcreIn, pcreExtraIn, groups, names, pattern, modifiers);
  214.  
  215. }
  216.  
  217. /**
  218. * Retrieve the count of groups that will be captured when executing match(), search(), replace(), or split().
  219. */
  220. public int groups () {
  221.  
  222. return _groups;
  223.  
  224. }
  225.  
  226. /**
  227. * Execute a match at the beginning of /subject/ and retrieve the first match.
  228. *
  229. * Params:
  230. * subject = The subject upon which the match will take place.
  231. */
  232. public Match match (char[] subject) {
  233.  
  234. Match match = new Match(this, subject, PCRE_ANCHORED);
  235.  
  236. match.find();
  237.  
  238. return match;
  239.  
  240. }
  241.  
  242. /**
  243. * Retrieve the modifiers used within this Regex instance.
  244. */
  245. public int modifiers () {
  246.  
  247. return _modifiers;
  248.  
  249. }
  250.  
  251. /**
  252. * Retrieve all named groups.
  253. */
  254. public char[][] names () {
  255.  
  256. return _names.keys;
  257.  
  258. }
  259.  
  260. /**
  261. * Execute a replacement on /subject/ where all pattern matches are replaced with /value/. If /max/ is unspecified
  262. * or 0, all matches will be replaced.
  263. *
  264. * Params:
  265. * subject = The subject upon which the replacement will take place.
  266. * value = The value that will replace each pattern match.
  267. * max = The maximum number of times the replacement will take place.
  268. *
  269. * Returns: A copy of /subject/ with replaced values.
  270. */
  271. public char[] replace (char[] subject, char[] value, int max = 0) {
  272.  
  273. int iteration;
  274. int position;
  275. char[] result;
  276.  
  277. foreach (match; search(subject)) {
  278.  
  279. iteration++;
  280.  
  281. result ~= subject[position .. match._vector[0]] ~ value;
  282. position = match._vector[0] + subject[match._vector[0] .. match._vector[1]].length;
  283.  
  284. if (max && iteration >= max) {
  285.  
  286. break;
  287.  
  288. }
  289.  
  290. }
  291.  
  292. result ~= subject[position .. $];
  293.  
  294. return result;
  295.  
  296. }
  297.  
  298. /**
  299. * Execute a replacement on /subject/ where each captured group within the pattern is replaced by the same value in
  300. * the /values/ array that applies to the same index. If /max/ is unspecified or 0, all matches will be replaced.
  301. *
  302. * NOTE: This only replaces captured groups, not uncaptured match data.
  303. *
  304. * Params:
  305. * subject = The subject upon which the replacement will take place.
  306. * values = The array of values that will replace each captured group. This must contain the same amount of
  307. * values as there are captured groups.
  308. * max = The maximum number of times the replacement will take place.
  309. *
  310. * Returns: A copy of /subject/ with replaced values.
  311. *
  312. * Throws: ArrayBoundsException If there are more captured groups than there are indexes inside /values/.
  313. */
  314. public char[] replace (char[] subject, char[][] values, int max = 0) {
  315.  
  316. int iteration;
  317. int position;
  318. char[] result;
  319.  
  320. foreach (match; search(subject)) {
  321.  
  322. iteration++;
  323.  
  324. result ~= subject[position .. match._vector[0]];
  325. position = match._vector[0] + subject[match._vector[0] .. match._vector[1]].length;
  326.  
  327. for (int group = 0; group < _groups; group++) {
  328.  
  329. result ~= values[group];
  330.  
  331. }
  332.  
  333. if (max && iteration >= max) {
  334.  
  335. break;
  336.  
  337. }
  338.  
  339. }
  340.  
  341. result ~= subject[position .. $];
  342.  
  343. return result;
  344.  
  345. }
  346.  
  347. /**
  348. * Execute a replacement on /subject/ where each match iteration, group index and group value are passed to /dg/
  349. * for replacement. If /max/ is unspecified or 0, all matches will be replaced.
  350. *
  351. * NOTE: This calls /dg/ for each captured group, not uncaptured match data.
  352. *
  353. * Params:
  354. * subject = The subject upon which the replacement will take place.
  355. * dg = The delegate that is called for each replacement. It takes three arguments:
  356. * 1. iteration = The match iteration.
  357. * 2. group = The group index.
  358. * 3. value = The value.
  359. * max = The maximum number of times the replacement will take place.
  360. *
  361. * Returns: A copy of /subject/ with replaced values.
  362. */
  363. public char[] replace (char[] subject, char[] delegate (int iteration, int group, char[] value) dg, int max = 0) {
  364.  
  365. int iteration;
  366. int position;
  367. char[] result;
  368.  
  369. foreach (match; search(subject)) {
  370.  
  371. iteration++;
  372.  
  373. result ~= subject[position .. match._vector[0]];
  374. position = match._vector[0] + subject[match._vector[0] .. match._vector[1]].length;
  375.  
  376. for (int group = 1; group <= _groups; group++) {
  377.  
  378. result ~= dg(iteration, group, subject[match._vector[group << 1] .. match._vector[(group << 1) + 1]]);
  379.  
  380. }
  381.  
  382. if (max && iteration >= max) {
  383.  
  384. break;
  385.  
  386. }
  387.  
  388. }
  389.  
  390. result ~= subject[position .. $];
  391.  
  392. return result;
  393.  
  394. }
  395.  
  396. /**
  397. * Retrieve the regular expression pattern used within this Regex instance.
  398. */
  399. public char[] pattern () {
  400.  
  401. return _pattern;
  402.  
  403. }
  404.  
  405. /**
  406. * Execute a search within /subject/ and retrieve the Match instance associated. Because /subject/ is searched,
  407. * you must iterate over the Match instance, or use its find() method to find each match.
  408. *
  409. * Params:
  410. * subject = The subject upon which the search will take place.
  411. *
  412. * Returns: This always return a Match instance.
  413. */
  414. public Match search (char[] subject) {
  415.  
  416. return new Match(this, subject);
  417.  
  418. }
  419.  
  420. /**
  421. * Execute a split on /subject/. If /max/ is unspecified or 0, all matches will be split.
  422. *
  423. * Params:
  424. * subject = The subject upon which the split will take place.
  425. * max = The maximum number of times the split will take place.
  426. *
  427. * Returns: The array of split /subject/ data.
  428. */
  429. public char[][] split (char[] subject, int max = 0) {
  430.  
  431. int iteration;
  432. int position;
  433. char[][] result;
  434.  
  435. foreach (match; search(subject)) {
  436.  
  437. iteration++;
  438.  
  439. result ~= subject[position .. match._vector[0]];
  440. position = match._vector[0] + subject[match._vector[0] .. match._vector[1]].length;
  441.  
  442. for (int group = 1; group <= _groups; group++) {
  443.  
  444. result ~= subject[match._vector[group << 1] .. match._vector[(group << 1) + 1]];
  445.  
  446. }
  447.  
  448. if (max && iteration >= max) {
  449.  
  450. break;
  451.  
  452. }
  453.  
  454. }
  455.  
  456. result ~= subject[position .. $];
  457.  
  458. return result;
  459.  
  460. }
  461.  
  462. /**
  463. * Execute a split on /subject/. If /max/ is unspecified or 0, all matches will be split.
  464. *
  465. * NOTE: If the /group/ value passed to /dg/ is 0, it means the value is not part of the match pattern.
  466. *
  467. * Params:
  468. * subject = The subject upon which the split will take place.
  469. * dg = The delegate that is called for each split. It takes three arguments:
  470. * 1. iteration = The match iteration.
  471. * 2. group = The group index.
  472. * 3. value = The value.
  473. * max = The maximum number of times the split will take place.
  474. */
  475. public void split (char[] subject, void delegate (int iteration, int group, char[] value) dg, int max = 0) {
  476.  
  477. int iteration;
  478. int position;
  479.  
  480. foreach (match; search(subject)) {
  481.  
  482. iteration++;
  483.  
  484. dg(iteration, 0, subject[position .. match._vector[0]]);
  485.  
  486. position = match._vector[0] + subject[match._vector[0] .. match._vector[1]].length;
  487.  
  488. for (int group = 1; group <= _groups; group++) {
  489.  
  490. dg(iteration, group, subject[match._vector[group << 1] .. match._vector[(group << 1) + 1]]);
  491.  
  492. }
  493.  
  494. if (max && iteration >= max) {
  495.  
  496. break;
  497.  
  498. }
  499.  
  500. }
  501.  
  502. dg(++iteration, 0, subject[position .. $]);
  503.  
  504. }
  505.  
  506. }
  507.  
  508. /**
  509. * Match represents a single match. When this object is iterated or find() is called, it will then represent the next
  510. * available match.
  511. */
  512. public class Match {
  513.  
  514. /** Indicates that the most recent iteration or find() call completed successfully and a new match is available. */
  515. private bool _matches;
  516.  
  517. /** The modifiers used when matching the subject. */
  518. private int _modifiers = PCRE_NEWLINE_ANYCRLF;
  519.  
  520. /** The parent Regex instance under which this Match instance is operating. */
  521. private Regex _regex;
  522.  
  523. /** The subject upon which all matching will take place. **/
  524. private char[] _subject;
  525.  
  526. /** The internal vector of match positions. */
  527. private int[] _vector;
  528.  
  529. // +---------------------------------------------------------------------------------------------------------------+
  530. // | CONSTRUCTORS / DESTRUCTOR |
  531. // +---------------------------------------------------------------------------------------------------------------+
  532.  
  533. /**
  534. * Create a new Match instance.
  535. *
  536. * Params:
  537. * regex = The parent Regex instance under whcih this Match instance will operate.
  538. * subject = The subject upon which all matching will take place.
  539. * modifiers = The modifiers used when matching /subject/.
  540. */
  541. private this (Regex regex, char[] subject, int modifiers = 0) {
  542.  
  543. _modifiers |= modifiers;
  544. _regex = regex;
  545. _subject = subject;
  546. _vector.length = (_regex.groups + 1) * 3;
  547.  
  548. }
  549.  
  550. // +---------------------------------------------------------------------------------------------------------------+
  551. // | OPERATORS |
  552. // +---------------------------------------------------------------------------------------------------------------+
  553.  
  554. /**
  555. * Handle a foreach statement that expects /iteration/ and /match/.
  556. *
  557. * NOTE: This calls reset() prior to matching.
  558. *
  559. * Params:
  560. * dg = The delegate that is called for each match.
  561. */
  562. public int opApply (int delegate (ref int iteration, ref Match match) dg) {
  563.  
  564. int iteration;
  565. int result;
  566.  
  567. reset();
  568.  
  569. while (find()) {
  570.  
  571. iteration++;
  572.  
  573. result = dg(iteration, this);
  574.  
  575. if (result) {
  576.  
  577. break;
  578.  
  579. }
  580.  
  581. }
  582.  
  583. return result;
  584.  
  585. }
  586.  
  587. /**
  588. * Handle a foreach statement that expects /match/.
  589. *
  590. * NOTE: This calls reset() prior to matching.
  591. *
  592. * Params:
  593. * dg = The delegate that is called for each match.
  594. */
  595. public int opApply (int delegate (ref Match match) dg) {
  596.  
  597. int result;
  598.  
  599. reset();
  600.  
  601. while (find()) {
  602.  
  603. result = dg(this);
  604.  
  605. if (result) {
  606.  
  607. break;
  608.  
  609. }
  610.  
  611. }
  612.  
  613. return result;
  614.  
  615. }
  616.  
  617. /**
  618. * Handle an index expression that retrieves the captured group value that is associated with /group/.
  619. *
  620. * Params:
  621. * group = The captured group name.
  622. *
  623. * Throws: ArrayBoundsException If the group name is nonexistent.
  624. */
  625. public char[] opIndex (char[] group) {
  626.  
  627. int index = _regex._names[group];
  628.  
  629. return _subject[_vector[index << 1] .. _vector[(index << 1) + 1]];
  630.  
  631. }
  632.  
  633. /**
  634. * Handle an index expression that retrieves the captured group value that is associated with /group/.
  635. *
  636. * Params:
  637. * group = The captured group index.
  638. *
  639. * Throws: ArrayBoundsException If the group index is nonexistent.
  640. */
  641. public char[] opIndex (int group) {
  642.  
  643. return _subject[_vector[group << 1] .. _vector[(group << 1) + 1]];
  644.  
  645. }
  646.  
  647. // +---------------------------------------------------------------------------------------------------------------+
  648. // | METHODS |
  649. // +---------------------------------------------------------------------------------------------------------------+
  650.  
  651. /**
  652. * Execute the next pattern match and reflect the match information in this Match instance.
  653. *
  654. * NOTE: matches() must be called to determine if a match was found before you access any match data.
  655. *
  656. * Returns: If a match was found, this will return true, otherwise false.
  657. */
  658. public bool find () {
  659.  
  660. _matches = false;
  661.  
  662. if (_vector[1] > -1) {
  663.  
  664. int exec = pcre_exec(_regex._pcreIn, _regex._pcreExtraIn, _subject.ptr, _subject.length, _vector[1],
  665. _modifiers, _vector.ptr, _vector.length);
  666.  
  667. if (exec > PCRE_ERROR_NOMATCH) {
  668.  
  669. _matches = true;
  670.  
  671. } else if (exec < PCRE_ERROR_NOMATCH) {
  672.  
  673. throw new RegexException("Failed to retrieve next pattern match");
  674.  
  675. }
  676.  
  677. }
  678.  
  679. return _matches;
  680.  
  681. }
  682.  
  683. /**
  684. * Retrieve the portion of the subject that matched the pattern. This is identical to accessing group index 0.
  685. */
  686. public char[] match () {
  687.  
  688. return _subject[_vector[0] .. _vector[1]];
  689.  
  690. }
  691.  
  692. /**
  693. * Indicates whether or not the most recent find() call matched a new portion of the subject.
  694. */
  695. public bool matches () {
  696.  
  697. return _matches;
  698.  
  699. }
  700.  
  701. /**
  702. * Retrieve the position of the subject at which the most recent iteration or find() call matched.
  703. */
  704. public int position () {
  705.  
  706. return _vector[0];
  707.  
  708. }
  709.  
  710. /**
  711. * Retrieve the parent Regex instance under which this Match instance is operating.
  712. */
  713. public Regex regex () {
  714.  
  715. return _regex;
  716.  
  717. }
  718.  
  719. /**
  720. * Reset all iteration and find() results so the next iteration or find() call will start from the beginning of
  721. * the subject.
  722. */
  723. public void reset () {
  724.  
  725. _vector[0] = 0;
  726. _vector[1] = 0;
  727.  
  728. }
  729.  
  730. /**
  731. * Retrieve the subject upon which this Match instance is operating.
  732. */
  733. public char[] subject () {
  734.  
  735. return _subject;
  736.  
  737. }
  738.  
  739. }
  740.  
  741. // +-------------------------------------------------------------------------------------------------------------------+
  742. // | UNIT TESTS |
  743. // +-------------------------------------------------------------------------------------------------------------------+
  744.  
  745. debug (UnitTest) {
  746.  
  747. unittest {
  748.  
  749. Regex r = Regex.compile(r"^(?P<start>[a-z]{2}\d) (?P<data>[^\s]+) end$", Modifier.I | Modifier.M);
  750. char[] s = "aB5 regexfun end\nYz9 juststuff end\nmn1 fancycode end";
  751.  
  752. assert("start" in r);
  753. assert("data" in r);
  754. assert(r.groups() == 2);
  755.  
  756. foreach (i, m; r.search(s)) {
  757.  
  758. if (i == 1) {
  759.  
  760. assert(m[0] == m.match());
  761. assert(m[0] == "aB5 regexfun end");
  762. assert(m[1] == "aB5");
  763. assert(m[2] == "regexfun");
  764. assert(m["start"] == "aB5");
  765. assert(m["data"] == "regexfun");
  766.  
  767. } else if (i == 2) {
  768.  
  769. assert(m[0] == m.match());
  770. assert(m[0] == "Yz9 juststuff end");
  771. assert(m[1] == "Yz9");
  772. assert(m[2] == "juststuff");
  773. assert(m["start"] == "Yz9");
  774. assert(m["data"] == "juststuff");
  775.  
  776. } else if (i == 3) {
  777.  
  778. assert(m[0] == m.match());
  779. assert(m[0] == "mn1 fancycode end");
  780. assert(m[1] == "mn1");
  781. assert(m[2] == "fancycode");
  782. assert(m["start"] == "mn1");
  783. assert(m["data"] == "fancycode");
  784.  
  785. }
  786.  
  787. }
  788.  
  789. assert(r.replace(s, "xxx") == "xxx\nxxx\nxxx");
  790. assert(r.replace(s, "xxx", 2) == "xxx\nxxx\nmn1 fancycode end");
  791. assert(r.replace(s, ["123", "456"]) == "123456\n123456\n123456");
  792. assert(r.replace(s, ["123", "456"], 1) == "123456\nYz9 juststuff end\nmn1 fancycode end");
  793.  
  794. r = Regex.compile(r"start (\d+)(\w+) end", Modifier.I);
  795. s = "start 42abc end start 38176qwerty end start 1337bugs end";
  796.  
  797. assert(r.replace(s,
  798.  
  799. (int iteration, int group, char[] value) {
  800.  
  801. if (iteration == 1) {
  802.  
  803. if (group == 1) {
  804.  
  805. assert(value == "42");
  806.  
  807. } else {
  808.  
  809. assert(value == "abc");
  810.  
  811. }
  812.  
  813. } else if (iteration == 2) {
  814.  
  815. if (group == 1) {
  816.  
  817. assert(value == "38176");
  818.  
  819. } else {
  820.  
  821. assert(value == "qwerty");
  822.  
  823. }
  824.  
  825. } else {
  826.  
  827. if (group == 1) {
  828.  
  829. assert(value == "1337");
  830.  
  831. } else {
  832.  
  833. assert(value == "bugs");
  834.  
  835. }
  836.  
  837. }
  838.  
  839. return value;
  840.  
  841. }
  842.  
  843. ) == "42abc 38176qwerty 1337bugs");
  844.  
  845. r = Regex.compile(r"\W+", Modifier.I);
  846. s = "Hello, World, In, D";
  847.  
  848. assert(r.split(s) == ["Hello", "World", "In", "D"]);
  849. assert(r.split(s, 2) == ["Hello", "World", "In, D"]);
  850.  
  851. r = Regex.compile(r"([a-z]+) (\d+)", Modifier.I);
  852. s = "#?#?Hello 123!!##D 456&&##";
  853.  
  854. r.split(s,
  855.  
  856. (int iteration, int group, char[] value) {
  857.  
  858. if (iteration == 1) {
  859.  
  860. if (group == 0) {
  861.  
  862. assert(value == "#?#?");
  863.  
  864. } else if (group == 1) {
  865.  
  866. assert(value == "Hello");
  867.  
  868. } else if (group == 2) {
  869.  
  870. assert(value == "123");
  871.  
  872. }
  873.  
  874. } else if (iteration == 2) {
  875.  
  876. if (group == 0) {
  877.  
  878. assert(value == "!!##");
  879.  
  880. } else if (group == 1) {
  881.  
  882. assert(value == "D");
  883.  
  884. } else if (group == 2) {
  885.  
  886. assert(value == "456");
  887.  
  888. }
  889.  
  890. } else if (iteration == 3) {
  891.  
  892. assert(value == "&&##");
  893.  
  894. }
  895.  
  896. }
  897.  
  898. );
  899.  
  900. }
  901.  
  902. }
Add Comment
Please, Sign In to add comment