Advertisement
Guest User

erroneus std.string

a guest
Mar 10th, 2014
175
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
D 126.16 KB | None | 0 0
  1. // Written in the D programming language.
  2.  
  3. /**
  4. String handling functions. Objects of types $(D _string), $(D
  5. wstring), and $(D dstring) are value types and cannot be mutated
  6. element-by-element. For using mutation during building strings, use
  7. $(D char[]), $(D wchar[]), or $(D dchar[]). The $(D *_string) types
  8. are preferable because they don't exhibit undesired aliasing, thus
  9. making code more robust.
  10.  
  11. Macros: WIKI = Phobos/StdString
  12.  
  13. Copyright: Copyright Digital Mars 2007-.
  14.  
  15. License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
  16.  
  17. Authors: $(WEB digitalmars.com, Walter Bright),
  18.          $(WEB erdani.org, Andrei Alexandrescu),
  19.          and Jonathan M Davis
  20.  
  21. Source:    $(PHOBOSSRC std/_string.d)
  22.  
  23. */
  24. module std.string;
  25.  
  26. debug=string;                 // uncomment to turn on debugging printf's
  27. debug(string) import core.stdc.stdio;
  28.  
  29. import core.exception : RangeError, onRangeError;
  30. import core.vararg, core.stdc.stdlib, core.stdc.string,
  31.     std.algorithm, std.ascii, std.conv, std.exception, std.format, std.functional,
  32.     std.range, std.traits,
  33.     std.typecons, std.typetuple, std.uni, std.utf;
  34.  
  35. //Remove when repeat is finally removed. They're only here as part of the
  36. //deprecation of these functions in std.string.
  37. public import std.algorithm : startsWith, endsWith, cmp, count;
  38. public import std.array : join, split;
  39.  
  40. version(Windows) extern (C)
  41. {
  42.     size_t wcslen(in wchar *);
  43.     int wcscmp(in wchar *, in wchar *);
  44. }
  45.  
  46. version(unittest) import std.algorithm : filter;
  47.  
  48. /* ************* Exceptions *************** */
  49.  
  50. /++
  51.     Exception thrown on errors in std.string functions.
  52.   +/
  53. class StringException : Exception
  54. {
  55.     /++
  56.         Params:
  57.             msg  = The message for the exception.
  58.             file = The file where the exception occurred.
  59.             line = The line number where the exception occurred.
  60.             next = The previous exception in the chain of exceptions, if any.
  61.       +/
  62.     this(string msg,
  63.          string file = __FILE__,
  64.          size_t line = __LINE__,
  65.          Throwable next = null) @safe pure nothrow
  66.     {
  67.         super(msg, file, line, next);
  68.     }
  69. }
  70.  
  71.  
  72. /++
  73.     Compares two ranges of characters lexicographically. The comparison is
  74.     case insensitive. Use $(XREF algorithm, cmp) for a case sensitive
  75.     comparison. For details see $(XREF uni, _icmp).
  76.  
  77.     $(BOOKTABLE,
  78.         $(TR $(TD $(D < 0))  $(TD $(D s1 < s2) ))
  79.         $(TR $(TD $(D = 0))  $(TD $(D s1 == s2)))
  80.         $(TR $(TD $(D > 0))  $(TD $(D s1 > s2)))
  81.      )
  82. +/
  83. alias icmp = std.uni.icmp;
  84.  
  85. unittest
  86. {
  87.     debug(string) printf("string.icmp.unittest\n");
  88.  
  89.     assertCTFEable!(
  90.     {
  91.     assert(icmp("Ü", "ü") == 0, "Über failure");
  92.     assert(icmp("abc", "abc") == 0);
  93.     assert(icmp("ABC", "abc") == 0);
  94.     assert(icmp("abc"w, "abc") == 0);
  95.     assert(icmp("ABC", "abc"w) == 0);
  96.     assert(icmp("abc"d, "abc") == 0);
  97.     assert(icmp("ABC", "abc"d) == 0);
  98.     assert(icmp(cast(char[])"abc", "abc") == 0);
  99.     assert(icmp("ABC", cast(char[])"abc") == 0);
  100.     assert(icmp(cast(wchar[])"abc"w, "abc") == 0);
  101.     assert(icmp("ABC", cast(wchar[])"abc"w) == 0);
  102.     assert(icmp(cast(dchar[])"abc"d, "abc") == 0);
  103.     assert(icmp("ABC", cast(dchar[])"abc"d) == 0);
  104.     assert(icmp(cast(string)null, cast(string)null) == 0);
  105.     assert(icmp("", "") == 0);
  106.     assert(icmp("abc", "abcd") < 0);
  107.     assert(icmp("abcd", "abc") > 0);
  108.     assert(icmp("abc", "abd") < 0);
  109.     assert(icmp("bbc", "abc") > 0);
  110.     assert(icmp("abc", "abc"w) == 0);
  111.     assert(icmp("ABC"w, "abc") == 0);
  112.     assert(icmp("", ""w) == 0);
  113.     assert(icmp("abc"w, "abcd") < 0);
  114.     assert(icmp("abcd", "abc"w) > 0);
  115.     assert(icmp("abc", "abd") < 0);
  116.     assert(icmp("bbc"w, "abc") > 0);
  117.     assert(icmp("aaa", "aaaa"d) < 0);
  118.     assert(icmp("aaaa"w, "aaa"d) > 0);
  119.     assert(icmp("aaa"d, "aaa"w) == 0);
  120.     assert(icmp("\u0430\u0411\u0543"d, "\u0430\u0411\u0543") == 0);
  121.     assert(icmp("\u0430\u0411\u0543"d, "\u0431\u0410\u0544") < 0);
  122.     assert(icmp("\u0431\u0411\u0544"d, "\u0431\u0410\u0543") > 0);
  123.     assert(icmp("\u0430\u0410\u0543"d, "\u0430\u0410\u0544") < 0);
  124.     assert(icmp("\u0430\u0411\u0543"d, "\u0430\u0411\u0543\u0237") < 0);
  125.     assert(icmp("\u0430\u0411\u0543\u0237"d, "\u0430\u0411\u0543") > 0);
  126.  
  127.     assert(icmp("aaa", filter!"true"("aaa")) == 0);
  128.     assert(icmp(filter!"true"("aaa"), "aaa") == 0);
  129.     assert(icmp(filter!"true"("aaa"), filter!"true"("aaa")) == 0);
  130.     assert(icmp(filter!"true"("\u0430\u0411\u0543"d), "\u0430\u0411\u0543") == 0);
  131.     assert(icmp(filter!"true"("\u0430\u0411\u0543"d), "\u0431\u0410\u0544"w) < 0);
  132.     assert(icmp("\u0431\u0411\u0544"d, filter!"true"("\u0431\u0410\u0543"w)) > 0);
  133.     assert(icmp("\u0430\u0410\u0543"d, filter!"true"("\u0430\u0410\u0544")) < 0);
  134.     assert(icmp(filter!"true"("\u0430\u0411\u0543"d), filter!"true"("\u0430\u0411\u0543\u0237")) < 0);
  135.     assert(icmp(filter!"true"("\u0430\u0411\u0543\u0237"d), filter!"true"("\u0430\u0411\u0543")) > 0);
  136.     });
  137. }
  138.  
  139.  
  140. /++
  141.     Returns a C-style zero-terminated string equivalent to $(D s). $(D s)
  142.     must not contain embedded $(D '\0')'s as any C function will treat the first
  143.     $(D '\0') that it sees as the end of the string. If $(D s.empty) is
  144.     $(D true), then a string containing only $(D '\0') is returned.
  145.  
  146.     $(RED Important Note:) When passing a $(D char*) to a C function, and the C
  147.     function keeps it around for any reason, make sure that you keep a reference
  148.     to it in your D code. Otherwise, it may go away during a garbage collection
  149.     cycle and cause a nasty bug when the C code tries to use it.
  150.   +/
  151. immutable(char)* toStringz(const(char)[] s) pure nothrow
  152. in
  153. {
  154.     // The assert below contradicts the unittests!
  155.     //assert(memchr(s.ptr, 0, s.length) == null,
  156.     //text(s.length, ": `", s, "'"));
  157. }
  158. out (result)
  159. {
  160.     if (result)
  161.     {
  162.         auto slen = s.length;
  163.         while (slen > 0 && s[slen-1] == 0) --slen;
  164.         assert(strlen(result) == slen);
  165.         assert(memcmp(result, s.ptr, slen) == 0);
  166.     }
  167. }
  168. body
  169. {
  170.     /+ Unfortunately, this isn't reliable.
  171.      We could make this work if string literals are put
  172.      in read-only memory and we test if s[] is pointing into
  173.      that.
  174.  
  175.      /* Peek past end of s[], if it's 0, no conversion necessary.
  176.      * Note that the compiler will put a 0 past the end of static
  177.      * strings, and the storage allocator will put a 0 past the end
  178.      * of newly allocated char[]'s.
  179.      */
  180.      char* p = &s[0] + s.length;
  181.      if (*p == 0)
  182.      return s;
  183.      +/
  184.  
  185.     // Need to make a copy
  186.     auto copy = new char[s.length + 1];
  187.     copy[0..s.length] = s[];
  188.     copy[s.length] = 0;
  189.  
  190.     return assumeUnique(copy).ptr;
  191. }
  192.  
  193. /++ Ditto +/
  194. immutable(char)* toStringz(string s) pure nothrow
  195. {
  196.     if (s.empty) return "".ptr;
  197.     /* Peek past end of s[], if it's 0, no conversion necessary.
  198.      * Note that the compiler will put a 0 past the end of static
  199.      * strings, and the storage allocator will put a 0 past the end
  200.      * of newly allocated char[]'s.
  201.      */
  202.     immutable p = s.ptr + s.length;
  203.     // Is p dereferenceable? A simple test: if the p points to an
  204.     // address multiple of 4, then conservatively assume the pointer
  205.     // might be pointing to a new block of memory, which might be
  206.     // unreadable. Otherwise, it's definitely pointing to valid
  207.     // memory.
  208.     if ((cast(size_t) p & 3) && *p == 0)
  209.         return s.ptr;
  210.     return toStringz(cast(const char[]) s);
  211. }
  212.  
  213. unittest
  214. {
  215.     debug(string) printf("string.toStringz.unittest\n");
  216.  
  217.     // TODO: CTFEable toStringz is really necessary?
  218.     //assertCTFEable!(
  219.     //{
  220.     auto p = toStringz("foo");
  221.     assert(strlen(p) == 3);
  222.     const(char)[] foo = "abbzxyzzy";
  223.     p = toStringz(foo[3..5]);
  224.     assert(strlen(p) == 2);
  225.  
  226.     string test = "";
  227.     p = toStringz(test);
  228.     assert(*p == 0);
  229.  
  230.     test = "\0";
  231.     p = toStringz(test);
  232.     assert(*p == 0);
  233.  
  234.     test = "foo\0";
  235.     p = toStringz(test);
  236.     assert(p[0] == 'f' && p[1] == 'o' && p[2] == 'o' && p[3] == 0);
  237.     //});
  238. }
  239. ///convert a null terminated string in a D slice.No heap allocation is made
  240. T[]fromStringz(T)(T*str,T term=T.init)pure nothrow
  241. {
  242.     return StringzRange(str,term).array;
  243. }
  244. ///An input range iterating over a null terminated string.Because it does *not*
  245. ///compute the total length of the string it can be faster than fromStringz when
  246. ///you need only a input and a Forward range and nothing more.
  247. struct StringzRange(T)
  248. {
  249. public:
  250.     ///the pointer to the begin of the string
  251.     T*ptr;
  252.     ///the terminating character
  253.     immutable T back;
  254.     ///the default constructor.T.init is the null character when T is char
  255.     this(T*pointer,T terminator=T.init)pure nothrow
  256.     {
  257.         ptr=pointer;
  258.         back=terminator;
  259.     }
  260.     ///the input range interface
  261.     @propriety bool empty()pure nothrow
  262.     {
  263.         return*ptr==back;
  264.     }
  265.     ///ditto
  266.     @propriety ref inout(T) front()inout pure nothrow
  267.     {
  268.         return*ptr;
  269.     }
  270.     ///ditto
  271.     void popFront()pure nothrow
  272.     {
  273.         ++ptr;
  274.     }
  275.     ///the Forward range interface
  276.     @propriety inout(StringzRange!T)save()inout pure nothrow
  277.     {
  278.         return this;
  279.     }
  280.     ///convert the null terminated string in a D slice on the fly.No heap allocation is made
  281.     @propriety T[]array()nothrow
  282.     {
  283.         T[]result;
  284.         result.ptr=ptr;
  285.         static if(is(T:const char))
  286.             result.length=std.c.string.strlen(ptr);
  287.         else
  288.             result.length=walkLength(this);
  289.         return result;
  290.     }/*TODO:really needed implicit conversion to array?
  291.     ///Rest of range interface
  292.     alias array this;*/
  293. }
  294. unittest{}//TODO
  295. /**
  296.    Flag indicating whether a search is case-sensitive.
  297. */
  298. enum CaseSensitive { no, yes }
  299.  
  300. /++
  301.     Returns the index of the first occurrence of $(D c) in $(D s). If $(D c)
  302.     is not found, then $(D -1) is returned.
  303.  
  304.     $(D cs) indicates whether the comparisons are case sensitive.
  305.   +/
  306. ptrdiff_t indexOf(S)(S s,ElementType!S c,CaseSensitive cs = CaseSensitive.yes) @safe pure
  307.     if (isSomeString!S||( isInputRange!S && isSomeChar!(ElementType!S)))
  308. {
  309.     alias Char=ElementType!S;
  310.     if (cs == CaseSensitive.yes)
  311.     {
  312.         static if (is(S==Char[]) && Char.sizeof == 1)
  313.         {
  314.             if (std.ascii.isASCII(c) && !__ctfe)
  315.             {                                               // Plain old ASCII
  316.                 auto trustedmemchr() @trusted { return cast(Char*)memchr(s.ptr, c, s.length); }
  317.                 auto p = trustedmemchr();
  318.                 if (p)
  319.                     return p - s.ptr;
  320.                 else
  321.                     return -1;
  322.             }
  323.         }
  324.  
  325.         // c is a universal character
  326.         foreach (ptrdiff_t i, dchar c2; s)
  327.         {
  328.             if (c == c2)
  329.                 return i;
  330.         }
  331.     }
  332.     else
  333.     {
  334.         if (std.ascii.isASCII(c))
  335.         {                                                   // Plain old ASCII
  336.             auto c1 = cast(char) std.ascii.toLower(c);
  337.  
  338.             foreach (ptrdiff_t i, c2; s)
  339.             {
  340.                 auto c3 = std.ascii.toLower(c2);
  341.                 if (c1 == c3)
  342.                     return i;
  343.             }
  344.         }
  345.         else
  346.         {                                                   // c is a universal character
  347.             auto c1 = std.uni.toLower(c);
  348.  
  349.             foreach (ptrdiff_t i, dchar c2; s)
  350.             {
  351.                 auto c3 = std.uni.toLower(c2);
  352.                 if (c1 == c3)
  353.                     return i;
  354.             }
  355.         }
  356.     }
  357.     return -1;
  358. }
  359.  
  360. unittest
  361. {
  362.     debug(string) printf("string.indexOf.unittest\n");
  363.  
  364.     assertCTFEable!(
  365.     {
  366.     foreach (S; TypeTuple!(string, wstring, dstring))
  367.     {
  368.         assert(indexOf(cast(S)null, cast(dchar)'a') == -1);
  369.         assert(indexOf(to!S("def"), cast(dchar)'a') == -1);
  370.         assert(indexOf(to!S("abba"), cast(dchar)'a') == 0);
  371.         assert(indexOf(to!S("def"), cast(dchar)'f') == 2);
  372.  
  373.         assert(indexOf(to!S("def"), cast(dchar)'a', CaseSensitive.no) == -1);
  374.         assert(indexOf(to!S("def"), cast(dchar)'a', CaseSensitive.no) == -1);
  375.         assert(indexOf(to!S("Abba"), cast(dchar)'a', CaseSensitive.no) == 0);
  376.         assert(indexOf(to!S("def"), cast(dchar)'F', CaseSensitive.no) == 2);
  377.         assert(indexOf(to!S("ödef"), 'ö', CaseSensitive.no) == 0);
  378.  
  379.         S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
  380.         assert(indexOf("def", cast(char)'f', CaseSensitive.no) == 2);
  381.         assert(indexOf(sPlts, cast(char)'P', CaseSensitive.no) == 23);
  382.         assert(indexOf(sPlts, cast(char)'R', CaseSensitive.no) == 2);
  383.     }
  384.  
  385.     foreach (cs; EnumMembers!CaseSensitive)
  386.     {
  387.         assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', cs) == 9);
  388.         assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', cs) == 7);
  389.         assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', cs) == 6);
  390.     }
  391.     });
  392. }
  393.  
  394. /++
  395.     Returns the index of the first occurrence of $(D c) in $(D s) with respect
  396.     to the start index $(D startIdx). If $(D c) is not found, then $(D -1) is
  397.     returned. If $(D c) is found the value of the returned index is at least
  398.     $(D startIdx). $(D startIdx) represents a codeunit index in $(D s). If the
  399.     sequence starting at $(D startIdx) does not represent a well formed codepoint,
  400.     then a $(XREF utf,UTFException) may be thrown.
  401.  
  402.     $(D cs) indicates whether the comparisons are case sensitive.
  403.   +/
  404. ptrdiff_t indexOf(S)(S s, ElementType!S c, const size_t startIdx,
  405.         CaseSensitive cs = CaseSensitive.yes) @safe pure
  406.     if (isSomeString!S||(isInputRange!S &&  isSomeChar!(ElementType!S)))
  407. {
  408.     static if(hasLenght!S)
  409.     {
  410.         if (startIdx >= s.length)return -1;
  411.         s.popFrontExactly(startIdx);
  412.     }
  413.     else
  414.     {
  415.         s.popFrontN(startIdx);
  416.     }
  417.     ptrdiff_t foundIdx = indexOf(s, c, cs);
  418.     if (foundIdx == -1)return-1;
  419.     return foundIdx + cast(ptrdiff_t)startIdx;
  420. }
  421.  
  422. unittest
  423. {
  424.     debug(string) printf("string.indexOf(startIdx).unittest\n");
  425.  
  426.     foreach (S; TypeTuple!(string, wstring, dstring))
  427.     {
  428.         assert(indexOf(cast(S)null, cast(dchar)'a', 1) == -1);
  429.         assert(indexOf(to!S("def"), cast(dchar)'a', 1) == -1);
  430.         assert(indexOf(to!S("abba"), cast(dchar)'a', 1) == 3);
  431.         assert(indexOf(to!S("def"), cast(dchar)'f', 1) == 2);
  432.  
  433.         assert((to!S("def")).indexOf(cast(dchar)'a', 1,
  434.                 CaseSensitive.no) == -1);
  435.         assert(indexOf(to!S("def"), cast(dchar)'a', 1,
  436.                 CaseSensitive.no) == -1);
  437.         assert(indexOf(to!S("def"), cast(dchar)'a', 12,
  438.                 CaseSensitive.no) == -1);
  439.         assert(indexOf(to!S("AbbA"), cast(dchar)'a', 2,
  440.                 CaseSensitive.no) == 3);
  441.         assert(indexOf(to!S("def"), cast(dchar)'F', 2, CaseSensitive.no) == 2);
  442.  
  443.         S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
  444.         assert(indexOf("def", cast(char)'f', cast(uint)2,
  445.             CaseSensitive.no) == 2);
  446.         assert(indexOf(sPlts, cast(char)'P', 12, CaseSensitive.no) == 23);
  447.         assert(indexOf(sPlts, cast(char)'R', cast(ulong)1,
  448.             CaseSensitive.no) == 2);
  449.     }
  450.  
  451.     foreach(cs; EnumMembers!CaseSensitive)
  452.     {
  453.         assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', 2, cs)
  454.             == 9);
  455.         assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', 3, cs)
  456.             == 7);
  457.         assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', 6, cs)
  458.             == 6);
  459.     }
  460. }
  461.  
  462. /++
  463.     Returns the index of the first occurrence of $(D sub) in $(D s). If $(D sub)
  464.     is not found, then $(D -1) is returned.
  465.  
  466.     $(D cs) indicates whether the comparisons are case sensitive.
  467.   +/
  468. ptrdiff_t indexOf(S1, S2)(S1 s,S2 sub,CaseSensitive cs = CaseSensitive.yes)
  469.     if (is(Unqual!(ElementType!S1)==Unqual!(ElementType!S2))
  470.         &&(isSomeString!S1||(isSomeChar!(ElementType!S1)
  471.         &&isForwardRange!S1&&isForwardRange!S2)))
  472. {
  473.     S1 balance;
  474.     if (cs == CaseSensitive.yes)
  475.     {
  476.         balance = std.algorithm.find(s, sub);
  477.     }
  478.     else
  479.     {
  480.         balance = std.algorithm.find!
  481.             ((a, b) => std.uni.toLower(a) == std.uni.toLower(b))
  482.             (s, sub);
  483.     }
  484.     if(balance.empty)return-1;
  485.     static if(is(S1==ElementType!S1[]))
  486.         return s.ptr-balance.ptr;
  487.     return indexOf(s,balance.front,cs);//TODO:implement a more efficient algorithm for this case
  488. }
  489.  
  490. unittest
  491. {
  492.     debug(string) printf("string.indexOf.unittest\n");
  493.  
  494.     assertCTFEable!(
  495.     {
  496.     foreach (S; TypeTuple!(string, wstring, dstring))
  497.     {
  498.         foreach (T; TypeTuple!(string, wstring, dstring))
  499.         {
  500.             assert(indexOf(cast(S)null, to!T("a")) == -1);
  501.             assert(indexOf(to!S("def"), to!T("a")) == -1);
  502.             assert(indexOf(to!S("abba"), to!T("a")) == 0);
  503.             assert(indexOf(to!S("def"), to!T("f")) == 2);
  504.             assert(indexOf(to!S("dfefffg"), to!T("fff")) == 3);
  505.             assert(indexOf(to!S("dfeffgfff"), to!T("fff")) == 6);
  506.  
  507.             assert(indexOf(to!S("dfeffgfff"), to!T("a"), CaseSensitive.no) == -1);
  508.             assert(indexOf(to!S("def"), to!T("a"), CaseSensitive.no) == -1);
  509.             assert(indexOf(to!S("abba"), to!T("a"), CaseSensitive.no) == 0);
  510.             assert(indexOf(to!S("def"), to!T("f"), CaseSensitive.no) == 2);
  511.             assert(indexOf(to!S("dfefffg"), to!T("fff"), CaseSensitive.no) == 3);
  512.             assert(indexOf(to!S("dfeffgfff"), to!T("fff"), CaseSensitive.no) == 6);
  513.  
  514.             S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
  515.             S sMars = "Who\'s \'My Favorite Maritian?\'";
  516.  
  517.             assert(indexOf(sMars, to!T("MY fAVe"), CaseSensitive.no) == -1);
  518.             assert(indexOf(sMars, to!T("mY fAVOriTe"), CaseSensitive.no) == 7);
  519.             assert(indexOf(sPlts, to!T("mArS:"), CaseSensitive.no) == 0);
  520.             assert(indexOf(sPlts, to!T("rOcK"), CaseSensitive.no) == 17);
  521.             assert(indexOf(sPlts, to!T("Un."), CaseSensitive.no) == 41);
  522.             assert(indexOf(sPlts, to!T(sPlts), CaseSensitive.no) == 0);
  523.  
  524.             assert(indexOf("\u0100", to!T("\u0100"), CaseSensitive.no) == 0);
  525.  
  526.             // Thanks to Carlos Santander B. and zwang
  527.             assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
  528.                            to!T("page-break-before"), CaseSensitive.no) == -1);
  529.         }
  530.  
  531.         foreach (cs; EnumMembers!CaseSensitive)
  532.         {
  533.             assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), cs) == 9);
  534.             assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), cs) == 7);
  535.             assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), cs) == 6);
  536.         }
  537.     }
  538.     });
  539. }
  540.  
  541. /++
  542.     Returns the index of the first occurrence of $(D sub) in $(D s) with
  543.     respect to the start index $(D startIdx). If $(D sub) is not found, then
  544.     $(D -1) is returned. If $(D sub) is found the value of the returned index
  545.     is at least $(D startIdx). $(D startIdx) represents a codeunit index in
  546.     $(D s). If the sequence starting at $(D startIdx) does not represent a well
  547.     formed codepoint, then a $(XREF utf,UTFException) may be thrown.
  548.  
  549.     $(D cs) indicates whether the comparisons are case sensitive.
  550.   +/
  551. ptrdiff_t indexOf(S1, S2)(S1 s, S2 sub,
  552.         const size_t startIdx, CaseSensitive cs = CaseSensitive.yes)
  553.     if (isSomeString!S||(isForwardRange!S1
  554.         && isForwardRange!S2 && isSomeChar!(ElementType!S1)))
  555. {
  556.     static if(hasLenght!S1)
  557.     {
  558.         if (startIdx >= s.length)return -1;
  559.         s.popFrontExactly(startIdx);
  560.     }
  561.     else
  562.     {
  563.         s.popFrontN(startIdx);
  564.     }
  565.     ptrdiff_t foundIdx = indexOf(s,sub, cs);
  566.     if (foundIdx == -1)return-1;
  567.     return foundIdx + cast(ptrdiff_t)startIdx;
  568. }
  569.  
  570. unittest
  571. {
  572.     debug(string) printf("string.indexOf(startIdx).unittest\n");
  573.  
  574.     foreach(S; TypeTuple!(string, wstring, dstring))
  575.     {
  576.         foreach(T; TypeTuple!(string, wstring, dstring))
  577.         {
  578.             assert(indexOf(cast(S)null, to!T("a"), 1337) == -1);
  579.             assert(indexOf(to!S("def"), to!T("a"), 0) == -1);
  580.             assert(indexOf(to!S("abba"), to!T("a"), 2) == 3);
  581.             assert(indexOf(to!S("def"), to!T("f"), 1) == 2);
  582.             assert(indexOf(to!S("dfefffg"), to!T("fff"), 1) == 3);
  583.             assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 5) == 6);
  584.  
  585.             assert(indexOf(to!S("dfeffgfff"), to!T("a"), 1, CaseSensitive.no) == -1);
  586.             assert(indexOf(to!S("def"), to!T("a"), 2, CaseSensitive.no) == -1);
  587.             assert(indexOf(to!S("abba"), to!T("a"), 3, CaseSensitive.no) == 3);
  588.             assert(indexOf(to!S("def"), to!T("f"), 1, CaseSensitive.no) == 2);
  589.             assert(indexOf(to!S("dfefffg"), to!T("fff"), 2, CaseSensitive.no) == 3);
  590.             assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 4, CaseSensitive.no) == 6);
  591.             assert(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, CaseSensitive.no) == 9,
  592.                 to!string(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, CaseSensitive.no))
  593.                 ~ " " ~ S.stringof ~ " " ~ T.stringof);
  594.  
  595.             S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
  596.             S sMars = "Who\'s \'My Favorite Maritian?\'";
  597.  
  598.             assert(indexOf(sMars, to!T("MY fAVe"), 10,
  599.                 CaseSensitive.no) == -1);
  600.             assert(indexOf(sMars, to!T("mY fAVOriTe"), 4, CaseSensitive.no) == 7);
  601.             assert(indexOf(sPlts, to!T("mArS:"), 0, CaseSensitive.no) == 0);
  602.             assert(indexOf(sPlts, to!T("rOcK"), 12, CaseSensitive.no) == 17);
  603.             assert(indexOf(sPlts, to!T("Un."), 32, CaseSensitive.no) == 41);
  604.             assert(indexOf(sPlts, to!T(sPlts), 0, CaseSensitive.no) == 0);
  605.  
  606.             assert(indexOf("\u0100", to!T("\u0100"), 0, CaseSensitive.no) == 0);
  607.  
  608.             // Thanks to Carlos Santander B. and zwang
  609.             assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
  610.                            to!T("page-break-before"), 10, CaseSensitive.no) == -1);
  611.  
  612.             // In order for indexOf with and without index to be consistent
  613.             assert(indexOf(to!S(""), to!T("")) == indexOf(to!S(""), to!T(""), 0));
  614.         }
  615.  
  616.         foreach(cs; EnumMembers!CaseSensitive)
  617.         {
  618.             assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"),
  619.                 3, cs) == 9);
  620.             assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"),
  621.                 3, cs) == 7);
  622.             assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"),
  623.                 3, cs) == 6);
  624.         }
  625.     }
  626. }
  627.  
  628. /++
  629.     Returns the index of the last occurrence of $(D c) in $(D s). If $(D c)
  630.     is not found, then $(D -1) is returned.
  631.  
  632.     $(D cs) indicates whether the comparisons are case sensitive.
  633.   +/
  634. ptrdiff_t lastIndexOf(S)(S s,ElementType!S c,CaseSensitive cs = CaseSensitive.yes) @safe pure
  635.     if (isSomeString!S||(isBidirectionalRange!S
  636.         &&hasLenght!S&&isSomeChar!(ElementType!S)))
  637. {
  638.     uint i=indexOf(s.reverse(),c,cs);
  639.     return i==-1?-1:s.length-i-1;
  640. }
  641.  
  642. unittest
  643. {
  644.     debug(string) printf("string.lastIndexOf.unittest\n");
  645.  
  646.     assertCTFEable!(
  647.     {
  648.     foreach (S; TypeTuple!(string, wstring, dstring))
  649.     {
  650.         assert(lastIndexOf(cast(S) null, 'a') == -1);
  651.         assert(lastIndexOf(to!S("def"), 'a') == -1);
  652.         assert(lastIndexOf(to!S("abba"), 'a') == 3);
  653.         assert(lastIndexOf(to!S("def"), 'f') == 2);
  654.         assert(lastIndexOf(to!S("ödef"), 'ö') == 0);
  655.  
  656.         assert(lastIndexOf(cast(S) null, 'a', CaseSensitive.no) == -1);
  657.         assert(lastIndexOf(to!S("def"), 'a', CaseSensitive.no) == -1);
  658.         assert(lastIndexOf(to!S("AbbA"), 'a', CaseSensitive.no) == 3);
  659.         assert(lastIndexOf(to!S("def"), 'F', CaseSensitive.no) == 2);
  660.         assert(lastIndexOf(to!S("ödef"), 'ö', CaseSensitive.no) == 0);
  661.         assert(lastIndexOf(to!S("i\u0100def"), to!dchar("\u0100"),
  662.             CaseSensitive.no) == 1);
  663.  
  664.         S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
  665.  
  666.         assert(lastIndexOf(to!S("def"), 'f', CaseSensitive.no) == 2);
  667.         assert(lastIndexOf(sPlts, 'M', CaseSensitive.no) == 34);
  668.         assert(lastIndexOf(sPlts, 'S', CaseSensitive.no) == 40);
  669.     }
  670.  
  671.     foreach (cs; EnumMembers!CaseSensitive)
  672.     {
  673.         assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
  674.         assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
  675.         assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
  676.     }
  677.     });
  678. }
  679.  
  680. /++
  681.     Returns the index of the last occurrence of $(D c) in $(D s). If $(D c) is
  682.     not found, then $(D -1) is returned. The $(D startIdx) slices $(D s) in
  683.     the following way $(D s[0 .. startIdx]). $(D startIdx) represents a
  684.     codeunit index in $(D s). If the sequence ending at $(D startIdx) does not
  685.     represent a well formed codepoint, then a $(XREF utf,UTFException) may be
  686.     thrown.
  687.  
  688.     $(D cs) indicates whether the comparisons are case sensitive.
  689.   +/
  690. ptrdiff_t lastIndexOf(S)(S s, ElementType!S c, const size_t startIdx,
  691.         CaseSensitive cs = CaseSensitive.yes) @safe pure
  692.     if (isSomeString!S||(isBidirectionalRange!S && hasLenght!S && isSomeChar!S))
  693. {
  694.     uint i=indexOf(s.reverse(),c,startIdx,cs);
  695.     return i==-1?-1:s.length-i-1;
  696. }
  697.  
  698. unittest
  699. {
  700.     debug(string) printf("string.lastIndexOf.unittest\n");
  701.  
  702.     foreach(S; TypeTuple!(string, wstring, dstring))
  703.     {
  704.         assert(lastIndexOf(cast(S) null, 'a') == -1);
  705.         assert(lastIndexOf(to!S("def"), 'a') == -1);
  706.         assert(lastIndexOf(to!S("abba"), 'a', 3) == 0);
  707.         assert(lastIndexOf(to!S("deff"), 'f', 3) == 2);
  708.  
  709.         assert(lastIndexOf(cast(S) null, 'a', CaseSensitive.no) == -1);
  710.         assert(lastIndexOf(to!S("def"), 'a', CaseSensitive.no) == -1);
  711.         assert(lastIndexOf(to!S("AbbAa"), 'a', to!ushort(4), CaseSensitive.no) == 3,
  712.                 to!string(lastIndexOf(to!S("AbbAa"), 'a', 4, CaseSensitive.no)));
  713.         assert(lastIndexOf(to!S("def"), 'F', 3, CaseSensitive.no) == 2);
  714.  
  715.         S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
  716.  
  717.         assert(lastIndexOf(to!S("def"), 'f', 4, CaseSensitive.no) == -1);
  718.         assert(lastIndexOf(sPlts, 'M', sPlts.length -2, CaseSensitive.no) == 34);
  719.         assert(lastIndexOf(sPlts, 'S', sPlts.length -2, CaseSensitive.no) == 40);
  720.     }
  721.  
  722.     foreach(cs; EnumMembers!CaseSensitive)
  723.     {
  724.         assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
  725.         assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
  726.         assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
  727.     }
  728. }
  729.  
  730. /++
  731.     Returns the index of the last occurrence of $(D sub) in $(D s). If $(D sub)
  732.     is not found, then $(D -1) is returned.
  733.  
  734.     $(D cs) indicates whether the comparisons are case sensitive.
  735.   +/
  736. ptrdiff_t lastIndexOf(S1,S2)(S1 s,S2 sub,CaseSensitive cs = CaseSensitive.yes) @safe pure
  737.     if(isBidirectionalRange!S1 && isBidirectionalRange!S2
  738.         && isForwardRange!S1 && isForwardRange!S2 &&hasLenght!S1)
  739. {
  740.     uint i=indexOf(s.reverse(),sub.reverse(),cs);
  741.     return i==-1?-1:s.length-i-1;
  742. }
  743. unittest
  744. {
  745.     debug(string) printf("string.lastIndexOf.unittest\n");
  746.  
  747.     assertCTFEable!(
  748.     {
  749.     foreach (S; TypeTuple!(string, wstring, dstring))
  750.     {
  751.         foreach (T; TypeTuple!(string, wstring, dstring))
  752.         {
  753.             enum typeStr = S.stringof ~ " " ~ T.stringof;
  754.  
  755.             assert(lastIndexOf(cast(S)null, to!T("a")) == -1, typeStr);
  756.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("c")) == 6, typeStr);
  757.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd")) == 6, typeStr);
  758.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef")) == 8, typeStr);
  759.             assert(lastIndexOf(to!S("abcdefCdef"), to!T("c")) == 2, typeStr);
  760.             assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd")) == 2, typeStr);
  761.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("x")) == -1, typeStr);
  762.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy")) == -1, typeStr);
  763.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("")) == 10, typeStr);
  764.             assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö")) == 0, typeStr);
  765.  
  766.             assert(lastIndexOf(cast(S)null, to!T("a"), CaseSensitive.no) == -1, typeStr);
  767.             assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), CaseSensitive.no) == 6, typeStr);
  768.             assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), CaseSensitive.no) == 6, typeStr);
  769.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"), CaseSensitive.no) == -1, typeStr);
  770.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy"), CaseSensitive.no) == -1, typeStr);
  771.             assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), CaseSensitive.no) == 10, typeStr);
  772.             assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö"), CaseSensitive.no) == 0, typeStr);
  773.  
  774.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), CaseSensitive.no) == 6, typeStr);
  775.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), CaseSensitive.no) == 6, typeStr);
  776.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), CaseSensitive.no) == 7, typeStr);
  777.  
  778.             assert(lastIndexOf(to!S("ödfeffgfff"), to!T("ö"), CaseSensitive.yes) == 0);
  779.  
  780.             S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
  781.             S sMars = "Who\'s \'My Favorite Maritian?\'";
  782.  
  783.             assert(lastIndexOf(sMars, to!T("RiTE maR"), CaseSensitive.no) == 14, typeStr);
  784.             assert(lastIndexOf(sPlts, to!T("FOuRTh"), CaseSensitive.no) == 10, typeStr);
  785.             assert(lastIndexOf(sMars, to!T("whO\'s \'MY"), CaseSensitive.no) == 0, typeStr);
  786.             assert(lastIndexOf(sMars, to!T(sMars), CaseSensitive.no) == 0, typeStr);
  787.         }
  788.  
  789.         foreach (cs; EnumMembers!CaseSensitive)
  790.         {
  791.             enum csString = to!string(cs);
  792.  
  793.             assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), cs) == 4, csString);
  794.             assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), cs) == 2, csString);
  795.             assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), cs) == 1, csString);
  796.         }
  797.     }
  798.     });
  799. }
  800. unittest{}//TODO
  801. /++
  802.     Returns the index of the last occurrence of $(D sub) in $(D s). If $(D sub)
  803.     is not found, then $(D -1) is returned. The $(D startIdx) slices $(D s) in
  804.     the following way $(D s[0 .. startIdx]). $(D startIdx) represents a
  805.     codeunit index in $(D s). If the sequence ending at $(D startIdx) does not
  806.     represent a well formed codepoint, then a $(XREF utf,UTFException) may be
  807.     thrown.
  808.  
  809.     $(D cs) indicates whether the comparisons are case sensitive.
  810.   +/
  811. ptrdiff_t lastIndexOf(S1, S2)(S1 s, S2 sub,
  812.         const size_t startIdx, CaseSensitive cs = CaseSensitive.yes) @safe pure
  813.     if (isSomeString!S1||(isBidirectionalRange!S1 && isForwardRange!S2 && isForwardRange!S1))
  814. {
  815.     static if(hasLenght!S1)
  816.     {
  817.         if(str.length<=startIdx)return-1;
  818.         str.popBackExactly(startIdx);
  819.     }
  820.     else
  821.         str.popBackN(startIdx);
  822.     return lastIndexOf(s,sub,cs);
  823. }
  824.  
  825. unittest
  826. {
  827.     debug(string) printf("string.lastIndexOf.unittest\n");
  828.  
  829.     foreach(S; TypeTuple!(string, wstring, dstring))
  830.     {
  831.         foreach(T; TypeTuple!(string, wstring, dstring))
  832.         {
  833.             enum typeStr = S.stringof ~ " " ~ T.stringof;
  834.  
  835.             assert(lastIndexOf(cast(S)null, to!T("a")) == -1, typeStr);
  836.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 5) == 2, typeStr);
  837.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 3) == -1, typeStr);
  838.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6) == 4, typeStr ~
  839.                 format(" %u", lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6)));
  840.             assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5) == 2, typeStr);
  841.             assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd"), 3) == -1, typeStr);
  842.             assert(lastIndexOf(to!S("abcdefcdefx"), to!T("x"), 1) == -1, typeStr);
  843.             assert(lastIndexOf(to!S("abcdefcdefxy"), to!T("xy"), 6) == -1, typeStr);
  844.             assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 8) == 8, typeStr);
  845.             assert(lastIndexOf(to!S("öafö"), to!T("ö"), 3) == 0, typeStr ~
  846.                     to!string(lastIndexOf(to!S("öafö"), to!T("ö"), 3))); //BUG 10472
  847.  
  848.             assert(lastIndexOf(cast(S)null, to!T("a"), 1, CaseSensitive.no) == -1, typeStr);
  849.             assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5, CaseSensitive.no) == 2, typeStr);
  850.             assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 4, CaseSensitive.no) == 2, typeStr ~
  851.                 " " ~ to!string(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 3, CaseSensitive.no)));
  852.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"),3 , CaseSensitive.no) == -1, typeStr);
  853.             assert(lastIndexOf(to!S("abcdefcdefXY"), to!T("xy"), 4, CaseSensitive.no) == -1, typeStr);
  854.             assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 7, CaseSensitive.no) == 7, typeStr);
  855.  
  856.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 4, CaseSensitive.no) == 2, typeStr);
  857.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 4, CaseSensitive.no) == 2, typeStr);
  858.             assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), 6, CaseSensitive.no) == 3, typeStr);
  859.             assert(lastIndexOf(to!S(""), to!T(""), 0) == lastIndexOf(to!S(""), to!T("")), typeStr);
  860.         }
  861.  
  862.         foreach(cs; EnumMembers!CaseSensitive)
  863.         {
  864.             enum csString = to!string(cs);
  865.  
  866.             assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), 6, cs) == 4, csString);
  867.             assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), 6, cs) == 2, csString);
  868.             assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), 3, cs) == 1, csString);
  869.         }
  870.     }
  871. }
  872.  
  873.  
  874. /**
  875.  * Returns the representation of a string, which has the same type
  876.  * as the string except the character type is replaced by $(D ubyte),
  877.  * $(D ushort), or $(D uint) depending on the character width.
  878.  */
  879. auto representation(Char)(Char[] s) pure nothrow
  880.     if (isSomeChar!Char)
  881. {
  882.     // Get representation type
  883.     alias U = TypeTuple!(ubyte, ushort, uint)[Char.sizeof / 2];
  884.  
  885.     // const and immutable storage classes
  886.     static if (is(Char == immutable))
  887.         alias T = immutable(U);
  888.     else static if (is(Char == const))
  889.         alias T = const(U);
  890.     else
  891.         alias T = U;
  892.  
  893.     // shared storage class (because shared(const(T)) is possible)
  894.     static if (is(Char == shared))
  895.         alias ST = shared(T);
  896.     else
  897.         alias ST = T;
  898.  
  899.     return cast(ST[]) s;
  900. }
  901. ///ditto
  902. alias Representation(Range)=typeof([Range.init.front].representation());
  903. ///
  904. unittest
  905. {
  906.     string s = "hello";
  907.     static assert(is(typeof(representation(s)) == immutable(ubyte)[]));
  908.     assert(representation(s) is cast(immutable(ubyte)[]) s);
  909.     assert(representation(s) == [0x68, 0x65, 0x6c, 0x6c, 0x6f]);
  910. }
  911. unittest
  912. {
  913.     assertCTFEable!(
  914.     {
  915.     void test(Char, T)(Char[] str)
  916.     {
  917.         static assert(is(typeof(representation(str)) == T[]));
  918.         assert(representation(str) is cast(T[]) str);
  919.     }
  920.  
  921.     foreach (Type; TypeTuple!(Tuple!(char , ubyte ),
  922.                               Tuple!(wchar, ushort),
  923.                               Tuple!(dchar, uint  )))
  924.     {
  925.         alias Char = FieldTypeTuple!Type[0];
  926.         alias Int  = FieldTypeTuple!Type[1];
  927.         enum immutable(Char)[] hello = "hello";
  928.  
  929.         test!(   immutable Char,    immutable Int)(hello);
  930.         test!(       const Char,        const Int)(hello);
  931.         test!(             Char,              Int)(hello.dup);
  932.         test!(      shared Char,       shared Int)(cast(shared) hello.dup);
  933.         test!(const shared Char, const shared Int)(hello);
  934.     }
  935.     });
  936. }
  937.  
  938.  
  939. /++
  940.     Returns a string which is identical to $(D s) except that all of its
  941.     characters are converted to lowercase (by preforming Unicode lowercase mapping).
  942.     If none of $(D s) characters were affected, then $(D s) itself is returned.
  943.   +/
  944. alias toLower = std.uni.toLower;
  945. /++
  946.     Converts $(D s) to lowercase (by performing Unicode lowercase mapping) in place.
  947.     For a few characters string length may increase after the transformation,
  948.     in such a case the function reallocates exactly once.
  949.     If $(D s) does not have any uppercase characters, then $(D s) is unaltered.
  950.  +/
  951. alias toLowerInPlace = std.uni.toLowerInPlace;
  952.  
  953. /++
  954.     Returns a string which is identical to $(D s) except that all of its
  955.     characters are converted to uppercase (by preforming Unicode uppercase mapping).
  956.     If none of $(D s) characters were affected, then $(D s) itself is returned.
  957.   +/
  958. alias toUpper = std.uni.toUpper;
  959.  
  960. /++
  961.     Converts $(D s) to uppercase  (by performing Unicode uppercase mapping) in place.
  962.     For a few characters string length may increase after the transformation,
  963.     in such a case the function reallocates exactly once.
  964.     If $(D s) does not have any lowercase characters, then $(D s) is unaltered.
  965.  +/
  966. alias toUpperInPlace = std.uni.toUpperInPlace;
  967.  
  968. /++
  969.     Capitalize the first character of $(D s) and convert the rest of $(D s)
  970.     to lowercase.
  971.  +/
  972. S capitalize(S)(S s) @trusted pure
  973.     if (isSomeString!S)
  974. {
  975.     Unqual!(typeof(s[0]))[] retval;
  976.     bool changed = false;
  977.  
  978.     foreach (i, dchar c; s)
  979.     {
  980.         dchar c2;
  981.  
  982.         if (i == 0)
  983.         {
  984.             c2 = std.uni.toUpper(c);
  985.             if (c != c2)
  986.                 changed = true;
  987.         }
  988.         else
  989.         {
  990.             c2 = std.uni.toLower(c);
  991.             if (c != c2)
  992.             {
  993.                 if (!changed)
  994.                 {
  995.                     changed = true;
  996.                     retval = s[0 .. i].dup;
  997.                 }
  998.             }
  999.         }
  1000.  
  1001.         if (changed)
  1002.             std.utf.encode(retval, c2);
  1003.     }
  1004.  
  1005.     return changed ? cast(S)retval : s;
  1006. }
  1007.  
  1008. unittest
  1009. {
  1010.     assertCTFEable!(
  1011.     {
  1012.     foreach (S; TypeTuple!(string, wstring, dstring, char[], wchar[], dchar[]))
  1013.     {
  1014.         S s1 = to!S("FoL");
  1015.         S s2;
  1016.  
  1017.         s2 = capitalize(s1);
  1018.         assert(cmp(s2, "Fol") == 0);
  1019.         assert(s2 !is s1);
  1020.  
  1021.         s2 = capitalize(s1[0 .. 2]);
  1022.         assert(cmp(s2, "Fo") == 0);
  1023.         assert(s2.ptr == s1.ptr);
  1024.  
  1025.         s1 = to!S("fOl");
  1026.         s2 = capitalize(s1);
  1027.         assert(cmp(s2, "Fol") == 0);
  1028.         assert(s2 !is s1);
  1029.         s1 = to!S("\u0131 \u0130");
  1030.         s2 = capitalize(s1);
  1031.         assert(cmp(s2, "I \u0130") == 0);
  1032.         assert(s2 !is s1);
  1033.  
  1034.         s1 = to!S("\u017F \u0049");
  1035.         s2 = capitalize(s1);
  1036.         assert(cmp(s2, "\u0053 \u0069") == 0);
  1037.         assert(s2 !is s1);
  1038.     }
  1039.     });
  1040. }
  1041. ///ditto
  1042. struct Capitalize(R)
  1043. {
  1044. private:
  1045.     typeof(R.init.map!std.uni.toLower())rg;
  1046.     bool isFirst=true;
  1047. public:
  1048.     alias rg this;
  1049.     this(R r)pure
  1050.     {
  1051.         rg=r.map!std.uni.toLower();
  1052.     }
  1053.     @propriety auto front()
  1054.     {
  1055.         if(isFirst)return rg.front.toUpper();
  1056.         else return rg.front;
  1057.     }
  1058.     void popFront()
  1059.     {
  1060.         isFirst=false;
  1061.         rg.popFront();
  1062.     }
  1063.     static if(isRandomAccessRange!R)
  1064.     {
  1065.         auto opIndex(size_t i)
  1066.         {
  1067.             if(i==0)return rg.front.toUpper();
  1068.             else return rg[i];
  1069.         }
  1070.     }
  1071.     static if(hasSlicing!R)
  1072.         Capitalize!R opSlice(size_t i,size_t j)
  1073.         {
  1074.             Capitalize!R result;
  1075.             result.rg=rg[i..j];
  1076.             result.isFirst= i==0;
  1077.             return result;
  1078.         }
  1079. }
  1080. ///ditto
  1081. Capitalize!R capitalize(R)(R r)@safe pure if(isInputRange!R)
  1082. {
  1083.     return typeof(return)(r);
  1084. }
  1085. unittest{}//TODO
  1086. /++
  1087.     Split $(D s) into an array of lines using $(D '\r'), $(D '\n'),
  1088.     $(D "\r\n"), $(XREF uni, lineSep), and $(XREF uni, paraSep) as delimiters.
  1089.     If $(D keepTerm) is set to $(D KeepTerminator.yes), then the delimiter
  1090.     is included in the strings returned.
  1091.   +/
  1092. enum KeepTerminator : bool { no, yes }
  1093. /// ditto
  1094. S[] splitLines(S)(S s, KeepTerminator keepTerm = KeepTerminator.no) @safe pure
  1095.     if (isSomeString!S)
  1096. {
  1097.     size_t iStart = 0;
  1098.     size_t nextI = 0;
  1099.     auto retval = appender!(S[])();
  1100.  
  1101.     for (size_t i; i < s.length; i = nextI)
  1102.     {
  1103.         immutable c = decode(s, nextI);
  1104.  
  1105.         if (c == '\r' || c == '\n' || c == lineSep || c == paraSep)
  1106.         {
  1107.             immutable isWinEOL = c == '\r' && i + 1 < s.length && s[i + 1] == '\n';
  1108.             auto iEnd = i;
  1109.  
  1110.             if (keepTerm == KeepTerminator.yes)
  1111.             {
  1112.                 iEnd = isWinEOL? nextI + 1 : nextI;
  1113.             }
  1114.  
  1115.             retval.put(s[iStart .. iEnd]);
  1116.             iStart = nextI;
  1117.  
  1118.             if (isWinEOL)
  1119.             {
  1120.                 ++nextI;
  1121.                 ++iStart;
  1122.             }
  1123.         }
  1124.     }
  1125.  
  1126.     if (iStart != nextI)
  1127.         retval.put(s[iStart .. $]);
  1128.  
  1129.     return retval.data;
  1130. }
  1131.  
  1132. unittest
  1133. {
  1134.     debug(string) printf("string.splitLines.unittest\n");
  1135.  
  1136.     assertCTFEable!(
  1137.     {
  1138.     foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
  1139.     {
  1140.         auto s = to!S("\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\n");
  1141.  
  1142.         auto lines = splitLines(s);
  1143.         assert(lines.length == 9);
  1144.         assert(lines[0] == "");
  1145.         assert(lines[1] == "peter");
  1146.         assert(lines[2] == "");
  1147.         assert(lines[3] == "paul");
  1148.         assert(lines[4] == "jerry");
  1149.         assert(lines[5] == "ice");
  1150.         assert(lines[6] == "cream");
  1151.         assert(lines[7] == "");
  1152.         assert(lines[8] == "sunday");
  1153.  
  1154.         lines = splitLines(s, KeepTerminator.yes);
  1155.         assert(lines.length == 9);
  1156.         assert(lines[0] == "\r");
  1157.         assert(lines[1] == "peter\n");
  1158.         assert(lines[2] == "\r");
  1159.         assert(lines[3] == "paul\r\n");
  1160.         assert(lines[4] == "jerry\u2028");
  1161.         assert(lines[5] == "ice\u2029");
  1162.         assert(lines[6] == "cream\n");
  1163.         assert(lines[7] == "\n");
  1164.         assert(lines[8] == "sunday\n");
  1165.  
  1166.         s.popBack(); // Lop-off trailing \n
  1167.         lines = splitLines(s);
  1168.         assert(lines.length == 9);
  1169.         assert(lines[8] == "sunday");
  1170.  
  1171.         lines = splitLines(s, KeepTerminator.yes);
  1172.         assert(lines.length == 9);
  1173.         assert(lines[8] == "sunday");
  1174.     }
  1175.     });
  1176. }
  1177. /+
  1178. ///ditto
  1179. auto splitLines(R)(R r,KeepTerminator keepTerm=KeepTerminator.no)@safe pure
  1180.     if(isInputRange!R&&hasLvalueElements!R)
  1181. {
  1182.     alias Ret=typeof(return);
  1183.     immutable nl="\n";//TODO:newline based on os' own newline
  1184.     static auto sanitize(Ror)(Ror ror,bool keep)
  1185.     {
  1186.         if(keep){
  1187.             static if(isBidirectionalRange!Ror&& isBidirectionalRange!Ret)
  1188.                 return ror.filterBidirectional!(a=>a!="")();
  1189.             else static if(!isBidirectionalRange!Ret)
  1190.                 return ror.filter!(a=>a!="")();
  1191.             else static assert(0,"can't give a bidirectional range from a non-bidirectional one")
  1192.         }
  1193.         else
  1194.         {
  1195.             if(hasLvalueElements!Ror)assert(0);
  1196.             typeof(ror.front)*prev;
  1197.             foreach(el;ror)
  1198.             {
  1199.                 if(el.empty)*prev~=nl;
  1200.                 else prev=&el;
  1201.             }
  1202.             return ror;
  1203.         }
  1204.     }
  1205.     return r.splitter(nl).sanitize(keepTerm==KeepTerminator.yes);
  1206. }
  1207. unittest{}//TODO
  1208. +/
  1209. /++
  1210.     Strips leading whitespace (as defined by $(XREF uni, isWhite)).
  1211.  
  1212.     Returns: $(D str) stripped of leading whitespace.
  1213.  
  1214.     Postconditions: $(D str) and the returned value
  1215.     will share the same tail (see $(XREF array, sameTail)).
  1216.   +/
  1217. auto stripLeft(S)(S str) @safe pure
  1218.     if (isInputRange!S &&isSomeChar!(ElementType!S))
  1219. {
  1220.     return str.stripLeft!std.uni.isWhite();
  1221. }
  1222.  
  1223. ///
  1224. @safe pure unittest
  1225. {
  1226.     assert(stripLeft("     hello world     ") ==
  1227.            "hello world     ");
  1228.     assert(stripLeft("\n\t\v\rhello world\n\t\v\r") ==
  1229.            "hello world\n\t\v\r");
  1230.     assert(stripLeft("hello world") ==
  1231.            "hello world");
  1232.     assert(stripLeft([lineSep] ~ "hello world" ~ lineSep) ==
  1233.            "hello world" ~ [lineSep]);
  1234.     assert(stripLeft([paraSep] ~ "hello world" ~ paraSep) ==
  1235.            "hello world" ~ [paraSep]);
  1236. }
  1237.  
  1238.  
  1239. /++
  1240.     Strips trailing whitespace (as defined by $(XREF uni, isWhite)).
  1241.  
  1242.     Returns: $(D str) stripped of trailing whitespace.
  1243.  
  1244.     Postconditions: $(D str) and the returned value
  1245.     will share the same head (see $(XREF array, sameHead)).
  1246.   +/
  1247. auto stripRight(S)(S str) @safe pure
  1248.     if (isBidirectionalRange!S&&isSomeChar!(ElementType!S))
  1249. {
  1250.     return str.stripRight!std.uni.isWhite();
  1251. }
  1252.  
  1253. ///
  1254. @safe pure unittest
  1255. {
  1256.     assert(stripRight("     hello world     ") ==
  1257.            "     hello world");
  1258.     assert(stripRight("\n\t\v\rhello world\n\t\v\r") ==
  1259.            "\n\t\v\rhello world");
  1260.     assert(stripRight("hello world") ==
  1261.            "hello world");
  1262.     assert(stripRight([lineSep] ~ "hello world" ~ lineSep) ==
  1263.            [lineSep] ~ "hello world");
  1264.     assert(stripRight([paraSep] ~ "hello world" ~ paraSep) ==
  1265.            [paraSep] ~ "hello world");
  1266. }
  1267.  
  1268.  
  1269. /++
  1270.     Strips both leading and trailing whitespace (as defined by
  1271.     $(XREF uni, isWhite)).
  1272.  
  1273.     Returns: $(D str) stripped of trailing whitespace.
  1274.   +/
  1275. auto strip(S)(S str) @safe pure
  1276.     if (isBidirectionalRange!S&&isSomeChar!(ElementType!S))
  1277. {
  1278.     return stripRight(stripLeft(str));
  1279. }
  1280.  
  1281. ///
  1282. @safe pure unittest
  1283. {
  1284.     assert(strip("     hello world     ") ==
  1285.            "hello world");
  1286.     assert(strip("\n\t\v\rhello world\n\t\v\r") ==
  1287.            "hello world");
  1288.     assert(strip("hello world") ==
  1289.            "hello world");
  1290.     assert(strip([lineSep] ~ "hello world" ~ [lineSep]) ==
  1291.            "hello world");
  1292.     assert(strip([paraSep] ~ "hello world" ~ [paraSep]) ==
  1293.            "hello world");
  1294. }
  1295.  
  1296. unittest
  1297. {
  1298.     debug(string) printf("string.strip.unittest\n");
  1299.  
  1300.     assertCTFEable!(
  1301.     {
  1302.     foreach (S; TypeTuple!( char[], const  char[],  string,
  1303.                            wchar[], const wchar[], wstring,
  1304.                            dchar[], const dchar[], dstring))
  1305.     {
  1306.         assert(equal(stripLeft(to!S("  foo\t ")), "foo\t "));
  1307.         assert(equal(stripLeft(to!S("\u2008  foo\t \u2007")), "foo\t \u2007"));
  1308.         assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r")), \u0085 \u00BB \r"));
  1309.         assert(equal(stripLeft(to!S("1")), "1"));
  1310.         assert(equal(stripLeft(to!S("\U0010FFFE")), "\U0010FFFE"));
  1311.         assert(equal(stripLeft(to!S("")), ""));
  1312.  
  1313.         assert(equal(stripRight(to!S("  foo\t ")), "  foo"));
  1314.         assert(equal(stripRight(to!S("\u2008  foo\t \u2007")), "\u2008  foo"));
  1315.         assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r")), "\u0085 μ \u0085 \u00BB"));
  1316.         assert(equal(stripRight(to!S("1")), "1"));
  1317.         assert(equal(stripRight(to!S("\U0010FFFE")), "\U0010FFFE"));
  1318.         assert(equal(stripRight(to!S("")), ""));
  1319.  
  1320.         assert(equal(strip(to!S("  foo\t ")), "foo"));
  1321.         assert(equal(strip(to!S("\u2008  foo\t \u2007")), "foo"));
  1322.         assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r")), \u0085 \u00BB"));
  1323.         assert(equal(strip(to!S("\U0010FFFE")), "\U0010FFFE"));
  1324.         assert(equal(strip(to!S("")), ""));
  1325.     }
  1326.     });
  1327. }
  1328.  
  1329. @safe pure unittest
  1330. {
  1331.     assertCTFEable!(
  1332.     {
  1333.     wstring s = " ";
  1334.     assert(s.sameTail(s.stripLeft()));
  1335.     assert(s.sameHead(s.stripRight()));
  1336.     });
  1337. }
  1338.  
  1339.  
  1340. /++
  1341.     If $(D str) ends with $(D delimiter), then $(D str) is returned without
  1342.     $(D delimiter) on its end. If it $(D str) does $(I not) end with
  1343.     $(D delimiter), then it is returned unchanged.
  1344.  
  1345.     If no $(D delimiter) is given, then one trailing  $(D '\r'), $(D '\n'),
  1346.     $(D "\r\n"), $(XREF uni, lineSep), or $(XREF uni, paraSep) is removed from
  1347.     the end of $(D str). If $(D str) does not end with any of those characters,
  1348.     then it is returned unchanged.
  1349.   +/
  1350. S chomp(S)(S str) @safe pure
  1351.     if (isBidirectionalRange!S && isSomeChar!(ElementType!S))
  1352. {
  1353.     if (str.empty)
  1354.         return str;
  1355.    
  1356.     switch (str.back)
  1357.     {
  1358.         case '\n':
  1359.         {
  1360.             str.popBack();
  1361.             if(str.back!='\r')return str;
  1362.             goto case;
  1363.         }
  1364.         case '\r':
  1365.         {
  1366.             str.popBack();
  1367.             return str;
  1368.         }
  1369.  
  1370.         //Pops off the last character if it's lineSep or paraSep.
  1371.         static if (is(ElementType!S : const char)&&isForwardRange!S)
  1372.         {
  1373.             //In UTF-8, lineSep and paraSep are [226, 128, 168], and
  1374.             //[226, 128, 169] respectively, so their first two bytes are the same.
  1375.             case 168: //Last byte of lineSep
  1376.             case 169: //Last byte of paraSep
  1377.             {
  1378.                 S orig=str.save;
  1379.                 str.popBack();
  1380.                 if(str.back==128)
  1381.                 {
  1382.                     str.popBack();
  1383.                     if(str.back==266){
  1384.                         str.popBack();
  1385.                         return str;
  1386.                     }
  1387.                 }
  1388.                 return orig;
  1389.             }
  1390.         }
  1391.         else
  1392.         {
  1393.             case lineSep:
  1394.             case paraSep:
  1395.                 goto case '\r';
  1396.         }
  1397.         default:
  1398.             return str;
  1399.     }
  1400. }
  1401.  
  1402. /// Ditto
  1403. S1 chomp(S1, S2)(S1 str, S2 delimiter) @safe pure
  1404.     if (isSomeChar!(ElementType!S1) && isSomeChar!(ElementType!S2)&&
  1405.     isBidirectionalRange!S1&&isBidirectionalRange!S2
  1406.     &&is(Unqual!(ElementType!S1) == Unqual!(ElementType!S2))&&hasLenght!S2)
  1407. {
  1408.     if (delimiter.empty)
  1409.         return chomp(str);
  1410.  
  1411.     if (str.endsWith(delimiter))
  1412.         str.popBackExactly(delimiter.length);
  1413.     return str;
  1414. }
  1415.  
  1416. /// Ditto
  1417. S1 chomp(S1, S2)(S1 str, S2 delimiter) @safe pure
  1418.     if (isSomeChar!(ElementType!S1) && isSomeChar!(ElementType!S2)&&
  1419.     isBidirectionalRange!S1&&isBidirectionalRange!S2&&isForwardRange!S1
  1420.     (!is(Unqual!(ElementType!S1) == Unqual!(ElementType!S2))|| !hasLenght!S2) )
  1421. {
  1422.     if (delimiter.empty)
  1423.         return chomp(str);
  1424.     auto orig = str.save;
  1425.     foreach_reverse (dchar c; delimiter)
  1426.     {
  1427.         if (str.empty || str.back != c)
  1428.         {
  1429.             return orig;
  1430.         }
  1431.         str.popBack();
  1432.     }
  1433.     return str;
  1434.     }
  1435.  
  1436. ///
  1437. @safe pure unittest
  1438. {
  1439.     assert(chomp(" hello world  \n\r") == " hello world  \n");
  1440.     assert(chomp(" hello world  \r\n") == " hello world  ");
  1441.     assert(chomp(" hello world  \n\n") == " hello world  \n");
  1442.     assert(chomp(" hello world  \n\n ") == " hello world  \n\n ");
  1443.     assert(chomp(" hello world  \n\n" ~ [lineSep]) == " hello world  \n\n");
  1444.     assert(chomp(" hello world  \n\n" ~ [paraSep]) == " hello world  \n\n");
  1445.     assert(chomp(" hello world") == " hello world");
  1446.     assert(chomp("") == "");
  1447.  
  1448.     assert(chomp(" hello world", "orld") == " hello w");
  1449.     assert(chomp(" hello world", " he") == " hello world");
  1450.     assert(chomp("", "hello") == "");
  1451.  
  1452.     // Don't decode pointlessly
  1453.     assert(chomp("hello\xFE", "\r") == "hello\xFE");
  1454. }
  1455.  
  1456. unittest
  1457. {
  1458.     debug(string) printf("string.chomp.unittest\n");
  1459.     string s;
  1460.  
  1461.     assertCTFEable!(
  1462.     {
  1463.     foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
  1464.     {
  1465.         // @@@ BUG IN COMPILER, MUST INSERT CAST
  1466.         assert(chomp(cast(S)null) is null);
  1467.         assert(chomp(to!S("hello")) == "hello");
  1468.         assert(chomp(to!S("hello\n")) == "hello");
  1469.         assert(chomp(to!S("hello\r")) == "hello");
  1470.         assert(chomp(to!S("hello\r\n")) == "hello");
  1471.         assert(chomp(to!S("hello\n\r")) == "hello\n");
  1472.         assert(chomp(to!S("hello\n\n")) == "hello\n");
  1473.         assert(chomp(to!S("hello\r\r")) == "hello\r");
  1474.         assert(chomp(to!S("hello\nxxx\n")) == "hello\nxxx");
  1475.         assert(chomp(to!S("hello\u2028")) == "hello");
  1476.         assert(chomp(to!S("hello\u2029")) == "hello");
  1477.         assert(chomp(to!S("hello\u2028\u2028")) == "hello\u2028");
  1478.         assert(chomp(to!S("hello\u2029\u2029")) == "hello\u2029");
  1479.  
  1480.         foreach (T; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
  1481.         {
  1482.             // @@@ BUG IN COMPILER, MUST INSERT CAST
  1483.             assert(chomp(cast(S)null, cast(T)null) is null);
  1484.             assert(chomp(to!S("hello\n"), cast(T)null) == "hello");
  1485.             assert(chomp(to!S("hello"), to!T("o")) == "hell");
  1486.             assert(chomp(to!S("hello"), to!T("p")) == "hello");
  1487.             // @@@ BUG IN COMPILER, MUST INSERT CAST
  1488.             assert(chomp(to!S("hello"), cast(T) null) == "hello");
  1489.             assert(chomp(to!S("hello"), to!T("llo")) == "he");
  1490.             assert(chomp(to!S("\uFF28ello"), to!T("llo")) == "\uFF28e");
  1491.             assert(chomp(to!S("\uFF28el\uFF4co"), to!T("l\uFF4co")) == "\uFF28e");
  1492.         }
  1493.     }
  1494.     });
  1495. }
  1496.  
  1497.  
  1498. /++
  1499.     If $(D str) starts with $(D delimiter), then the part of $(D str) following
  1500.     $(D delimiter) is returned. If it $(D str) does $(I not) start with
  1501.     $(D delimiter), then it is returned unchanged.
  1502.  +/
  1503. S1 chompPrefix(S1,S2)(S1 str, S2 delimiter) @safe pure
  1504.     if (isInputRange!S1 && isInputRange!S2 && hasLenght!S2
  1505.         &&is(Unqual!(ElementType!S1) == Unqual!(ElementType!S2)))
  1506. {
  1507.     if (str.startsWith(delimiter))
  1508.         str.popFrontExactly(delimiter.length);
  1509.     return str;
  1510. }
  1511. ///ditto
  1512. S1 chompPrefix(S1,S2)(S1 str, S2 delimiter) @safe pure
  1513.     if (isSomeChar!(ElementType!S1) && isSomeChar!(ElementType!S2) &&
  1514.      isInputRange!S2 && isForwardRange!S1)
  1515. {
  1516.     S1 orig = str.save;
  1517.     size_t index = 0;
  1518.  
  1519.     foreach (dchar c; delimiter)
  1520.     {
  1521.         if (index >= str.length || decode(str, index) != c)
  1522.             return orig;
  1523.     }
  1524.     str.popFrontExactly(index);
  1525.     return str;
  1526. }
  1527. ///
  1528. @safe pure unittest
  1529. {
  1530.     assert(chompPrefix("hello world", "he") == "llo world");
  1531.     assert(chompPrefix("hello world", "hello w") == "orld");
  1532.     assert(chompPrefix("hello world", " world") == "hello world");
  1533.     assert(chompPrefix("", "hello") == "");
  1534. }
  1535.  
  1536. /* @safe */ pure unittest
  1537. {
  1538.     assertCTFEable!(
  1539.     {
  1540.     foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
  1541.     {
  1542.         foreach (T; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
  1543.         {
  1544.             assert(equal(chompPrefix(to!S("abcdefgh"), to!T("abcde")), "fgh"));
  1545.             assert(equal(chompPrefix(to!S("abcde"), to!T("abcdefgh")), "abcde"));
  1546.             assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el\uFF4co")), ""));
  1547.             assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el")), "\uFF4co"));
  1548.             assert(equal(chompPrefix(to!S("\uFF28el"), to!T("\uFF28el\uFF4co")), "\uFF28el"));
  1549.         }
  1550.     }
  1551.     });
  1552. }
  1553.  
  1554.  
  1555. /++
  1556.     Returns $(D str) without its last character, if there is one. If $(D str)
  1557.     ends with $(D "\r\n"), then both are removed. If $(D str) is empty, then
  1558.     then it is returned unchanged.
  1559.  +/
  1560. S chop(S)(S str) @safe pure
  1561.     if (isBidirectionalRange!S &&isSomeChar!(ElementType!S))
  1562. {
  1563.     if (str.empty)
  1564.         return str;
  1565.     auto last=str.back;
  1566.     str.popBack();
  1567.     if(last=='\n'&&str.back=='\r')str.popBack();
  1568.     return str;
  1569. }
  1570.  
  1571. ///
  1572. @safe pure unittest
  1573. {
  1574.     assert(chop("hello world") == "hello worl");
  1575.     assert(chop("hello world\n") == "hello world");
  1576.     assert(chop("hello world\r") == "hello world");
  1577.     assert(chop("hello world\n\r") == "hello world\n");
  1578.     assert(chop("hello world\r\n") == "hello world");
  1579.     assert(chop("Walter Bright") == "Walter Brigh");
  1580.     assert(chop("") == "");
  1581. }
  1582.  
  1583. unittest
  1584. {
  1585.     debug(string) printf("string.chop.unittest\n");
  1586.  
  1587.     assertCTFEable!(
  1588.     {
  1589.     foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
  1590.     {
  1591.         assert(chop(cast(S) null) is null);
  1592.         assert(equal(chop(to!S("hello")), "hell"));
  1593.         assert(equal(chop(to!S("hello\r\n")), "hello"));
  1594.         assert(equal(chop(to!S("hello\n\r")), "hello\n"));
  1595.         assert(equal(chop(to!S("Verité")), "Verit"));
  1596.         assert(equal(chop(to!S(`さいごの果実`)), "さいごの果"));
  1597.         assert(equal(chop(to!S(`ミツバチと科学者`)), "ミツバチと科学"));
  1598.     }
  1599.     });
  1600. }
  1601.  
  1602.  
  1603. /++
  1604.     Left justify $(D s) in a field $(D width) characters wide. $(D fillChar)
  1605.     is the character that will be used to fill up the space in the field that
  1606.     $(D s) doesn't fill.
  1607.   +/
  1608. S leftJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure
  1609.     if (isSomeString!S)
  1610. {
  1611.     alias C = ElementEncodingType!S;
  1612.  
  1613.     if (canSearchInCodeUnits!C(fillChar))
  1614.     {
  1615.         immutable len = s.walkLength();
  1616.         if (len >= width)
  1617.             return s;
  1618.  
  1619.         auto retval = new Unqual!(C)[width - len + s.length];
  1620.         retval[0 .. s.length] = s[];
  1621.         retval[s.length .. $] = cast(C)fillChar;
  1622.         return cast(S)retval;
  1623.     }
  1624.     else
  1625.     {
  1626.         auto dstr = to!dstring(s);
  1627.         if (dstr.length >= width)
  1628.             return s;
  1629.  
  1630.         auto retval = new dchar[](width);
  1631.         retval[0 .. dstr.length] = dstr[];
  1632.         retval[dstr.length .. $] = fillChar;
  1633.         return to!S(retval);
  1634.     }
  1635. }
  1636.  
  1637.  
  1638. /++
  1639.     Right justify $(D s) in a field $(D width) characters wide. $(D fillChar)
  1640.     is the character that will be used to fill up the space in the field that
  1641.     $(D s) doesn't fill.
  1642.   +/
  1643. S rightJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure
  1644.     if (isSomeString!S)
  1645. {
  1646.     alias C = ElementEncodingType!S;
  1647.  
  1648.     if (canSearchInCodeUnits!C(fillChar))
  1649.     {
  1650.         immutable len = s.walkLength();
  1651.         if (len >= width)
  1652.             return s;
  1653.  
  1654.         auto retval = new Unqual!C[width - len + s.length];
  1655.         retval[0 .. $ - s.length] = cast(C)fillChar;
  1656.         retval[$ - s.length .. $] = s[];
  1657.         return cast(S)retval;
  1658.     }
  1659.     else
  1660.     {
  1661.         auto dstr = to!dstring(s);
  1662.         if (dstr.length >= width)
  1663.             return s;
  1664.  
  1665.         auto retval = new dchar[](width);
  1666.         retval[0 .. $ - dstr.length] = fillChar;
  1667.         retval[$ - dstr.length .. $] = dstr[];
  1668.         return to!S(retval);
  1669.     }
  1670. }
  1671.  
  1672.  
  1673. /++
  1674.     Center $(D s) in a field $(D width) characters wide. $(D fillChar)
  1675.     is the character that will be used to fill up the space in the field that
  1676.     $(D s) doesn't fill.
  1677.   +/
  1678. S center(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure
  1679.     if (isSomeString!S)
  1680. {
  1681.     alias C = ElementEncodingType!S;
  1682.  
  1683.     if (canSearchInCodeUnits!C(fillChar))
  1684.     {
  1685.         immutable len = s.walkLength();
  1686.         if (len >= width)
  1687.             return s;
  1688.  
  1689.         auto retval = new Unqual!C[width - len + s.length];
  1690.         immutable left = (retval.length - s.length) / 2;
  1691.         retval[0 .. left] = cast(C)fillChar;
  1692.         retval[left .. left + s.length] = s[];
  1693.         retval[left + s.length .. $] = cast(C)fillChar;
  1694.         return to!S(retval);
  1695.     }
  1696.     else
  1697.     {
  1698.         auto dstr = to!dstring(s);
  1699.         if (dstr.length >= width)
  1700.             return s;
  1701.  
  1702.         auto retval = new dchar[](width);
  1703.         immutable left = (retval.length - dstr.length) / 2;
  1704.         retval[0 .. left] = fillChar;
  1705.         retval[left .. left + dstr.length] = dstr[];
  1706.         retval[left + dstr.length .. $] = fillChar;
  1707.         return to!S(retval);
  1708.     }
  1709. }
  1710.  
  1711. unittest
  1712. {
  1713.     debug(string) printf("string.justify.unittest\n");
  1714.  
  1715.     assertCTFEable!(
  1716.     {
  1717.     foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
  1718.     {
  1719.         S s = to!S("hello");
  1720.  
  1721.         assert(leftJustify(s, 2) == "hello");
  1722.         assert(rightJustify(s, 2) == "hello");
  1723.         assert(center(s, 2) == "hello");
  1724.  
  1725.         assert(leftJustify(s, 7) == "hello  ");
  1726.         assert(rightJustify(s, 7) == "  hello");
  1727.         assert(center(s, 7) == " hello ");
  1728.  
  1729.         assert(leftJustify(s, 8) == "hello   ");
  1730.         assert(rightJustify(s, 8) == "   hello");
  1731.         assert(center(s, 8) == " hello  ");
  1732.  
  1733.         assert(leftJustify(s, 8, '\u0100') == "hello\u0100\u0100\u0100");
  1734.         assert(rightJustify(s, 8, '\u0100') == "\u0100\u0100\u0100hello");
  1735.         assert(center(s, 8, '\u0100') == "\u0100hello\u0100\u0100");
  1736.  
  1737.         assert(leftJustify(s, 8, 'ö') == "helloööö");
  1738.         assert(rightJustify(s, 8, 'ö') == "öööhello");
  1739.         assert(center(s, 8, 'ö') == "öhelloöö");
  1740.     }
  1741.     });
  1742. }
  1743.  
  1744.  
  1745. /++
  1746.     Replace each tab character in $(D s) with the number of spaces necessary
  1747.     to align the following character at the next tab stop where $(D tabSize)
  1748.     is the distance between tab stops.
  1749.   +/
  1750. S detab(S)(S s, size_t tabSize = 8) @trusted pure
  1751.     if (isSomeString!S)
  1752. {
  1753.     assert(tabSize > 0);
  1754.     alias C = Unqual!(typeof(s[0]));
  1755.     bool changes = false;
  1756.     C[] result;
  1757.     int column;
  1758.     size_t nspaces;
  1759.  
  1760.     foreach (size_t i, dchar c; s)
  1761.     {
  1762.         switch (c)
  1763.         {
  1764.         case '\t':
  1765.             nspaces = tabSize - (column % tabSize);
  1766.             if (!changes)
  1767.             {
  1768.                 changes = true;
  1769.                 result = null;
  1770.                 result.length = s.length + nspaces - 1;
  1771.                 result.length = i + nspaces;
  1772.                 result[0 .. i] = s[0 .. i];
  1773.                 result[i .. i + nspaces] = ' ';
  1774.             }
  1775.             else
  1776.             {
  1777.                 ptrdiff_t j = result.length;
  1778.                 result.length = j + nspaces;
  1779.                 result[j .. j + nspaces] = ' ';
  1780.             }
  1781.             column += nspaces;
  1782.             break;
  1783.  
  1784.         case '\r':
  1785.         case '\n':
  1786.         case paraSep:
  1787.         case lineSep:
  1788.             column = 0;
  1789.             goto L1;
  1790.  
  1791.         default:
  1792.             column++;
  1793.         L1:
  1794.             if (changes)
  1795.             {
  1796.                 std.utf.encode(result, c);
  1797.             }
  1798.             break;
  1799.         }
  1800.     }
  1801.  
  1802.     return changes ? cast(S) result : s;
  1803. }
  1804.  
  1805. unittest
  1806. {
  1807.     debug(string) printf("string.detab.unittest\n");
  1808.  
  1809.     assertCTFEable!(
  1810.     {
  1811.     foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
  1812.     {
  1813.         S s = to!S("This \tis\t a fofof\tof list");
  1814.         assert(cmp(detab(s), "This    is       a fofof        of list") == 0);
  1815.  
  1816.         assert(detab(cast(S)null) is null);
  1817.         assert(detab("").empty);
  1818.         assert(detab("a") == "a");
  1819.         assert(detab("\t") == "        ");
  1820.         assert(detab("\t", 3) == "   ");
  1821.         assert(detab("\t", 9) == "         ");
  1822.         assert(detab(  "  ab\t asdf ") == "  ab     asdf ");
  1823.         assert(detab(  "  \U00010000b\tasdf ") == "  \U00010000b    asdf ");
  1824.     }
  1825.     });
  1826. }
  1827.  
  1828. /++
  1829.     Replaces spaces in $(D s) with the optimal number of tabs.
  1830.     All spaces and tabs at the end of a line are removed.
  1831.  
  1832.     Params:
  1833.         s       = String to convert.
  1834.         tabSize = Tab columns are $(D tabSize) spaces apart.
  1835.  +/
  1836. S entab(S)(S s, size_t tabSize = 8) @trusted pure
  1837.     if (isSomeString!S)
  1838. {
  1839.     bool changes = false;
  1840.     alias C = Unqual!(typeof(s[0]));
  1841.     C[] result;
  1842.  
  1843.     int nspaces = 0;
  1844.     int nwhite = 0;
  1845.     size_t column = 0;         // column number
  1846.  
  1847.     foreach (size_t i, dchar c; s)
  1848.     {
  1849.  
  1850.         void change()
  1851.         {
  1852.             changes = true;
  1853.             result = null;
  1854.             result.length = s.length;
  1855.             result.length = i;
  1856.             result[0 .. i] = s[0 .. i];
  1857.         }
  1858.  
  1859.         switch (c)
  1860.         {
  1861.         case '\t':
  1862.             nwhite++;
  1863.             if (nspaces)
  1864.             {
  1865.                 if (!changes)
  1866.                     change();
  1867.  
  1868.                 ptrdiff_t j = result.length - nspaces;
  1869.                 auto ntabs = (((column - nspaces) % tabSize) + nspaces) / tabSize;
  1870.                 result.length = j + ntabs;
  1871.                 result[j .. j + ntabs] = '\t';
  1872.                 nwhite += ntabs - nspaces;
  1873.                 nspaces = 0;
  1874.             }
  1875.             column = (column + tabSize) / tabSize * tabSize;
  1876.             break;
  1877.  
  1878.         case '\r':
  1879.         case '\n':
  1880.         case paraSep:
  1881.         case lineSep:
  1882.             // Truncate any trailing spaces or tabs
  1883.             if (nwhite)
  1884.             {
  1885.                 if (!changes)
  1886.                     change();
  1887.                 result = result[0 .. result.length - nwhite];
  1888.             }
  1889.             break;
  1890.  
  1891.         default:
  1892.             if (nspaces >= 2 && (column % tabSize) == 0)
  1893.             {
  1894.                 if (!changes)
  1895.                     change();
  1896.  
  1897.                 auto j = result.length - nspaces;
  1898.                 auto ntabs = (nspaces + tabSize - 1) / tabSize;
  1899.                 result.length = j + ntabs;
  1900.                 result[j .. j + ntabs] = '\t';
  1901.                 nwhite += ntabs - nspaces;
  1902.                 nspaces = 0;
  1903.             }
  1904.             if (c == ' ')
  1905.             {   nwhite++;
  1906.                 nspaces++;
  1907.             }
  1908.             else
  1909.             {   nwhite = 0;
  1910.                 nspaces = 0;
  1911.             }
  1912.             column++;
  1913.             break;
  1914.         }
  1915.         if (changes)
  1916.         {
  1917.             std.utf.encode(result, c);
  1918.         }
  1919.     }
  1920.  
  1921.     // Truncate any trailing spaces or tabs
  1922.     if (nwhite)
  1923.     {
  1924.         if (changes)
  1925.             result = result[0 .. result.length - nwhite];
  1926.         else
  1927.             s = s[0 .. s.length - nwhite];
  1928.     }
  1929.     return changes ? assumeUnique(result) : s;
  1930. }
  1931.  
  1932. unittest
  1933. {
  1934.     debug(string) printf("string.entab.unittest\n");
  1935.  
  1936.     assertCTFEable!(
  1937.     {
  1938.     assert(entab(cast(string) null) is null);
  1939.     assert(entab("").empty);
  1940.     assert(entab("a") == "a");
  1941.     assert(entab("        ") == "");
  1942.     assert(entab("        x") == "\tx");
  1943.     assert(entab("  ab    asdf ") == "  ab\tasdf");
  1944.     assert(entab("  ab     asdf ") == "  ab\t asdf");
  1945.     assert(entab("  ab \t   asdf ") == "  ab\t   asdf");
  1946.     assert(entab("1234567 \ta") == "1234567\t\ta");
  1947.     assert(entab("1234567  \ta") == "1234567\t\ta");
  1948.     assert(entab("1234567   \ta") == "1234567\t\ta");
  1949.     assert(entab("1234567    \ta") == "1234567\t\ta");
  1950.     assert(entab("1234567     \ta") == "1234567\t\ta");
  1951.     assert(entab("1234567      \ta") == "1234567\t\ta");
  1952.     assert(entab("1234567       \ta") == "1234567\t\ta");
  1953.     assert(entab("1234567        \ta") == "1234567\t\ta");
  1954.     assert(entab("1234567         \ta") == "1234567\t\t\ta");
  1955.  
  1956.     assert(entab("a               ") == "a");
  1957.     assert(entab("a\v") == "a\v");
  1958.     assert(entab("a\f") == "a\f");
  1959.     assert(entab("a\n") == "a\n");
  1960.     assert(entab("a\n\r") == "a\n\r");
  1961.     assert(entab("a\r\n") == "a\r\n");
  1962.     assert(entab("a\u2028") == "a\u2028");
  1963.     assert(entab("a\u2029") == "a\u2029");
  1964.     assert(entab("a  ") == "a");
  1965.     assert(entab("a\t") == "a");
  1966.     assert(entab("\uFF28\uFF45\uFF4C\uFF4C567      \t\uFF4F \t") ==
  1967.                  "\uFF28\uFF45\uFF4C\uFF4C567\t\t\uFF4F");
  1968.     });
  1969. }
  1970.  
  1971.  
  1972. /++
  1973.     Replaces the characters in $(D str) which are keys in $(D transTable) with
  1974.     their corresponding values in $(D transTable). $(D transTable) is an AA
  1975.     where its keys are $(D dchar) and its values are either $(D dchar) or some
  1976.     type of string. Also, if $(D toRemove) is given, the characters in it are
  1977.     removed from $(D str) prior to translation. $(D str) itself is unaltered.
  1978.     A copy with the changes is returned.
  1979.  
  1980.     See_Also:
  1981.         $(LREF tr)
  1982.         $(XREF array, replace)
  1983.  
  1984.     Params:
  1985.         str        = The original string.
  1986.         transTable = The AA indicating which characters to replace and what to
  1987.                      replace them with.
  1988.         toRemove   = The characters to remove from the string.
  1989.   +/
  1990. C1[] translate(C1, C2 = immutable char)(C1[] str,
  1991.                                         dchar[dchar] transTable,
  1992.                                         const(C2)[] toRemove = null) @safe pure
  1993.     if (isSomeChar!C1 && isSomeChar!C2)
  1994. {
  1995.     auto buffer = appender!(C1[])();
  1996.     translateImpl(str, transTable, toRemove, buffer);
  1997.     return buffer.data;
  1998. }
  1999.  
  2000. ///
  2001. @safe pure unittest
  2002. {
  2003.     dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
  2004.     assert(translate("hello world", transTable1) == "h5ll7 w7rld");
  2005.  
  2006.     assert(translate("hello world", transTable1, "low") == "h5 rd");
  2007.  
  2008.     string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
  2009.     assert(translate("hello world", transTable2) == "h5llorange worangerld");
  2010. }
  2011.  
  2012. /* @safe */ pure unittest
  2013. {
  2014.     assertCTFEable!(
  2015.     {
  2016.     foreach (S; TypeTuple!( char[], const( char)[], immutable( char)[],
  2017.                            wchar[], const(wchar)[], immutable(wchar)[],
  2018.                            dchar[], const(dchar)[], immutable(dchar)[]))
  2019.     {
  2020.         assert(translate(to!S("hello world"), cast(dchar[dchar])['h' : 'q', 'l' : '5']) ==
  2021.                to!S("qe55o wor5d"));
  2022.         assert(translate(to!S("hello world"), cast(dchar[dchar])['o' : 'l', 'l' : '\U00010143']) ==
  2023.                to!S("he\U00010143\U00010143l wlr\U00010143d"));
  2024.         assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['h' : 'q', 'l': '5']) ==
  2025.                to!S("qe55o \U00010143 wor5d"));
  2026.         assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['o' : '0', '\U00010143' : 'o']) ==
  2027.                to!S("hell0 o w0rld"));
  2028.         assert(translate(to!S("hello world"), cast(dchar[dchar])null) == to!S("hello world"));
  2029.  
  2030.         foreach (T; TypeTuple!( char[], const( char)[], immutable( char)[],
  2031.                                wchar[], const(wchar)[], immutable(wchar)[],
  2032.                                dchar[], const(dchar)[], immutable(dchar)[]))
  2033.         {
  2034.             assert(translate(to!S("hello world"),
  2035.                              cast(dchar[dchar])['h' : 'q', 'l' : '5'],
  2036.                              to!T("r")) ==
  2037.                    to!S("qe55o wo5d"));
  2038.             assert(translate(to!S("hello world"),
  2039.                              cast(dchar[dchar])['h' : 'q', 'l' : '5'],
  2040.                              to!T("helo")) ==
  2041.                    to!S(" wrd"));
  2042.             assert(translate(to!S("hello world"),
  2043.                              cast(dchar[dchar])['h' : 'q', 'l' : '5'],
  2044.                              to!T("q5")) ==
  2045.                    to!S("qe55o wor5d"));
  2046.             assert(translate(to!S("hello \U00010143 world"),
  2047.                              cast(dchar[dchar])['o' : '0', '\U00010143' : 'o'],
  2048.                              to!T("\U00010143 ")) ==
  2049.                    to!S("hell0w0rld"));
  2050.         }
  2051.  
  2052.         auto s = to!S("hello world");
  2053.         dchar[dchar] transTable = ['h' : 'q', 'l' : '5'];
  2054.         static assert(is(typeof(s) == typeof(translate(s, transTable))));
  2055.     }
  2056.     });
  2057. }
  2058.  
  2059. /++ Ditto +/
  2060. C1[] translate(C1, S, C2 = immutable char)(C1[] str,
  2061.                                            S[dchar] transTable,
  2062.                                            const(C2)[] toRemove = null) @safe pure
  2063.     if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2)
  2064. {
  2065.     auto buffer = appender!(C1[])();
  2066.     translateImpl(str, transTable, toRemove, buffer);
  2067.     return buffer.data;
  2068. }
  2069.  
  2070. /* @safe */ pure unittest
  2071. {
  2072.     assertCTFEable!(
  2073.     {
  2074.     foreach (S; TypeTuple!( char[], const( char)[], immutable( char)[],
  2075.                            wchar[], const(wchar)[], immutable(wchar)[],
  2076.                            dchar[], const(dchar)[], immutable(dchar)[]))
  2077.     {
  2078.         assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"]) ==
  2079.                to!S("yellowe4242o wor42d"));
  2080.         assert(translate(to!S("hello world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
  2081.                to!S("he\U00010143\U00010143\U00010143\U00010143owl wowlr\U00010143\U00010143d"));
  2082.         assert(translate(to!S("hello \U00010143 world"), ['h' : "yellow", 'l' : "42"]) ==
  2083.                to!S("yellowe4242o \U00010143 wor42d"));
  2084.         assert(translate(to!S("hello \U00010143 world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
  2085.                to!S("he\U00010143\U00010143\U00010143\U00010143owl \U00010143 wowlr\U00010143\U00010143d"));
  2086.         assert(translate(to!S("hello \U00010143 world"), ['h' : ""]) ==
  2087.                to!S("ello \U00010143 world"));
  2088.         assert(translate(to!S("hello \U00010143 world"), ['\U00010143' : ""]) ==
  2089.                to!S("hello  world"));
  2090.         assert(translate(to!S("hello world"), cast(string[dchar])null) == to!S("hello world"));
  2091.  
  2092.         foreach (T; TypeTuple!( char[], const( char)[], immutable( char)[],
  2093.                                wchar[], const(wchar)[], immutable(wchar)[],
  2094.                                dchar[], const(dchar)[], immutable(dchar)[]))
  2095.         {
  2096.             assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"], to!T("r")) ==
  2097.                    to!S("yellowe4242o wo42d"));
  2098.             assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"], to!T("helo")) ==
  2099.                    to!S(" wrd"));
  2100.             assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"], to!T("y42")) ==
  2101.                    to!S("yellowe4242o wor42d"));
  2102.             assert(translate(to!S("hello \U00010143 world"),
  2103.                              ['o' : "owl", '\U00010143' : "\n"],
  2104.                              to!T("\U00010143 ")) ==
  2105.                    to!S("hellowlwowlrld"));
  2106.             assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"], to!T("hello world")) ==
  2107.                    to!S(""));
  2108.             assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"], to!T("42")) ==
  2109.                    to!S("yellowe4242o wor42d"));
  2110.         }
  2111.  
  2112.         auto s = to!S("hello world");
  2113.         string[dchar] transTable = ['h' : "silly", 'l' : "putty"];
  2114.         static assert(is(typeof(s) == typeof(translate(s, transTable))));
  2115.     }
  2116.     });
  2117. }
  2118.  
  2119. /++
  2120.     This is an overload of $(D translate) which takes an existing buffer to write the contents to.
  2121.  
  2122.     Params:
  2123.         str        = The original string.
  2124.         transTable = The AA indicating which characters to replace and what to
  2125.                      replace them with.
  2126.         toRemove   = The characters to remove from the string.
  2127.         buffer     = An output range to write the contents to.
  2128.   +/
  2129. void translate(C1, C2 = immutable char, Buffer)(C1[] str,
  2130.                                         dchar[dchar] transTable,
  2131.                                         const(C2)[] toRemove,
  2132.                                         Buffer buffer)
  2133.     if (isSomeChar!C1 && isSomeChar!C2 && isOutputRange!(Buffer, C1))
  2134. {
  2135.     translateImpl(str, transTable, toRemove, buffer);
  2136. }
  2137.  
  2138. ///
  2139. @safe pure unittest
  2140. {
  2141.     dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
  2142.     auto buffer = appender!(dchar[])();
  2143.     translate("hello world", transTable1, null, buffer);
  2144.     assert(buffer.data == "h5ll7 w7rld");
  2145.  
  2146.     buffer.clear();
  2147.     translate("hello world", transTable1, "low", buffer);
  2148.     assert(buffer.data == "h5 rd");
  2149.  
  2150.     buffer.clear();
  2151.     string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
  2152.     translate("hello world", transTable2, null, buffer);
  2153.     assert(buffer.data == "h5llorange worangerld");
  2154. }
  2155.  
  2156. /++ Ditto +/
  2157. void translate(C1, S, C2 = immutable char, Buffer)(C1[] str,
  2158.                                                    S[dchar] transTable,
  2159.                                                    const(C2)[] toRemove,
  2160.                                                    Buffer buffer)
  2161.     if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2 && isOutputRange!(Buffer, S))
  2162. {
  2163.     translateImpl(str, transTable, toRemove, buffer);
  2164. }
  2165.  
  2166. private void translateImpl(C1, T, C2, Buffer)(C1[] str,
  2167.                                       T transTable,
  2168.                                       const(C2)[] toRemove,
  2169.                                       Buffer buffer)
  2170. {
  2171.     bool[dchar] removeTable;
  2172.  
  2173.     foreach (dchar c; toRemove)
  2174.         removeTable[c] = true;
  2175.  
  2176.     foreach (dchar c; str)
  2177.     {
  2178.         if (c in removeTable)
  2179.             continue;
  2180.  
  2181.         auto newC = c in transTable;
  2182.  
  2183.         if (newC)
  2184.             put(buffer, *newC);
  2185.         else
  2186.             put(buffer, c);
  2187.     }
  2188. }
  2189.  
  2190. /++
  2191.     This is an $(I $(RED ASCII-only)) overload of $(LREF _translate). It
  2192.     will $(I not) work with Unicode. It exists as an optimization for the
  2193.     cases where Unicode processing is not necessary.
  2194.  
  2195.     Unlike the other overloads of $(LREF _translate), this one does not take
  2196.     an AA. Rather, it takes a $(D string) generated by $(LREF makeTrans).
  2197.  
  2198.     The array generated by $(D makeTrans) is $(D 256) elements long such that
  2199.     the index is equal to the ASCII character being replaced and the value is
  2200.     equal to the character that it's being replaced with. Note that translate
  2201.     does not decode any of the characters, so you can actually pass it Extended
  2202.     ASCII characters if you want to (ASCII only actually uses $(D 128)
  2203.     characters), but be warned that Extended ASCII characters are not valid
  2204.     Unicode and therefore will result in a $(D UTFException) being thrown from
  2205.     most other Phobos functions.
  2206.  
  2207.     Also, because no decoding occurs, it is possible to use this overload to
  2208.     translate ASCII characters within a proper UTF-8 string without altering the
  2209.     other, non-ASCII characters. It's replacing any code unit greater than
  2210.     $(D 127) with another code unit or replacing any code unit with another code
  2211.     unit greater than $(D 127) which will cause UTF validation issues.
  2212.  
  2213.     See_Also:
  2214.         $(LREF tr)
  2215.         $(XREF array, replace)
  2216.  
  2217.     Params:
  2218.         str        = The original string.
  2219.         transTable = The string indicating which characters to replace and what
  2220.                      to replace them with. It is generated by $(LREF makeTrans).
  2221.         toRemove   = The characters to remove from the string.
  2222.   +/
  2223. C[] translate(C = immutable char)(in char[] str, in char[] transTable, in char[] toRemove = null) @trusted pure nothrow
  2224.     if (is(Unqual!C == char))
  2225. in
  2226. {
  2227.     assert(transTable.length == 256);
  2228. }
  2229. body
  2230. {
  2231.     bool[256] remTable = false;
  2232.  
  2233.     foreach (char c; toRemove)
  2234.         remTable[c] = true;
  2235.  
  2236.     size_t count = 0;
  2237.     foreach (char c; str)
  2238.     {
  2239.         if (!remTable[c])
  2240.             ++count;
  2241.     }
  2242.  
  2243.     auto buffer = new char[count];
  2244.     translateImplAscii(str, transTable, remTable, buffer, toRemove);
  2245.     return cast(C[])(buffer);
  2246. }
  2247.  
  2248.  
  2249. /++ Ditto +/
  2250. string makeTrans(in char[] from, in char[] to) @trusted pure nothrow
  2251. in
  2252. {
  2253.     assert(from.length == to.length);
  2254.     assert(from.length <= 256);
  2255.     foreach (char c; from)
  2256.         assert(std.ascii.isASCII(c));
  2257.     foreach (char c; to)
  2258.         assert(std.ascii.isASCII(c));
  2259. }
  2260. body
  2261. {
  2262.     char[] transTable = new char[256];
  2263.  
  2264.     foreach (i; 0 .. transTable.length)
  2265.         transTable[i] = cast(char)i;
  2266.     foreach (i; 0 .. from.length)
  2267.         transTable[from[i]] = to[i];
  2268.  
  2269.     return assumeUnique(transTable);
  2270. }
  2271.  
  2272. ///
  2273. @safe pure nothrow unittest
  2274. {
  2275.     auto transTable1 = makeTrans("eo5", "57q");
  2276.     assert(translate("hello world", transTable1) == "h5ll7 w7rld");
  2277.  
  2278.     assert(translate("hello world", transTable1, "low") == "h5 rd");
  2279. }
  2280.  
  2281. @safe pure unittest
  2282. {
  2283.     assertCTFEable!(
  2284.     {
  2285.     foreach (C; TypeTuple!(char, const char, immutable char))
  2286.     {
  2287.         assert(translate!C("hello world", makeTrans("hl", "q5")) == to!(C[])("qe55o wor5d"));
  2288.  
  2289.         auto s = to!(C[])("hello world");
  2290.         auto transTable = makeTrans("hl", "q5");
  2291.         static assert(is(typeof(s) == typeof(translate!C(s, transTable))));
  2292.     }
  2293.  
  2294.     foreach (S; TypeTuple!(char[], const(char)[], immutable(char)[]))
  2295.     {
  2296.         assert(translate(to!S("hello world"), makeTrans("hl", "q5")) == to!S("qe55o wor5d"));
  2297.         assert(translate(to!S("hello \U00010143 world"), makeTrans("hl", "q5")) ==
  2298.                to!S("qe55o \U00010143 wor5d"));
  2299.         assert(translate(to!S("hello world"), makeTrans("ol", "1o")), to!S("heool wlrdd"));
  2300.         assert(translate(to!S("hello world"), makeTrans("", "")) == to!S("hello world"));
  2301.         assert(translate(to!S("hello world"), makeTrans("12345", "67890")) == to!S("hello world"));
  2302.         assert(translate(to!S("hello \U00010143 world"), makeTrans("12345", "67890")) ==
  2303.                to!S("hello \U00010143 world"));
  2304.  
  2305.         foreach (T; TypeTuple!(char[], const(char)[], immutable(char)[]))
  2306.         {
  2307.             assert(translate(to!S("hello world"), makeTrans("hl", "q5"), to!T("r")) ==
  2308.                    to!S("qe55o wo5d"));
  2309.             assert(translate(to!S("hello \U00010143 world"), makeTrans("hl", "q5"), to!T("r")) ==
  2310.                    to!S("qe55o \U00010143 wo5d"));
  2311.             assert(translate(to!S("hello world"), makeTrans("hl", "q5"), to!T("helo")) ==
  2312.                    to!S(" wrd"));
  2313.             assert(translate(to!S("hello world"), makeTrans("hl", "q5"), to!T("q5")) ==
  2314.                    to!S("qe55o wor5d"));
  2315.         }
  2316.     }
  2317.     });
  2318. }
  2319.  
  2320. /++
  2321.     This is an $(I $(RED ASCII-only)) overload of $(D translate) which takes an existing buffer to write the contents to.
  2322.  
  2323.     Params:
  2324.         str        = The original string.
  2325.         transTable = The string indicating which characters to replace and what
  2326.                      to replace them with. It is generated by $(LREF makeTrans).
  2327.         toRemove   = The characters to remove from the string.
  2328.         buffer     = An output range to write the contents to.
  2329.   +/
  2330. void translate(C = immutable char, Buffer)(in char[] str, in char[] transTable, in char[] toRemove, Buffer buffer)
  2331.     if (is(Unqual!C == char) && isOutputRange!(Buffer, char))
  2332. in
  2333. {
  2334.     assert(transTable.length == 256);
  2335. }
  2336. body
  2337. {
  2338.     bool[256] remTable = false;
  2339.  
  2340.     foreach (char c; toRemove)
  2341.         remTable[c] = true;
  2342.  
  2343.     translateImplAscii(str, transTable, remTable, buffer, toRemove);
  2344. }
  2345.  
  2346. ///
  2347. @safe pure unittest
  2348. {
  2349.     auto buffer = appender!(char[])();
  2350.     auto transTable1 = makeTrans("eo5", "57q");
  2351.     translate("hello world", transTable1, null, buffer);
  2352.     assert(buffer.data == "h5ll7 w7rld");
  2353.  
  2354.     buffer.clear();
  2355.     translate("hello world", transTable1, "low", buffer);
  2356.     assert(buffer.data == "h5 rd");
  2357. }
  2358.  
  2359. private void translateImplAscii(C = immutable char, Buffer)(in char[] str, in char[] transTable, ref bool[256] remTable, Buffer buffer, in char[] toRemove = null)
  2360. {
  2361.     static if (isOutputRange!(Buffer, char))
  2362.     {
  2363.         foreach (char c; str)
  2364.         {
  2365.             if (!remTable[c])
  2366.                 put(buffer, transTable[c]);
  2367.         }
  2368.     }
  2369.     else
  2370.     {
  2371.         size_t i = 0;
  2372.         foreach (char c; str)
  2373.         {
  2374.             if (!remTable[c])
  2375.                 buffer[i++] = transTable[c];
  2376.         }
  2377.     }
  2378. }
  2379.  
  2380. /*****************************************************
  2381.  * Format arguments into a string.
  2382.  *
  2383.  * Params: fmt  = Format string. For detailed specification, see $(XREF format,formattedWrite).
  2384.  *         args = Variadic list of arguments to format into returned string.
  2385.  *
  2386.  *  $(RED format's current implementation has been replaced with $(LREF xformat)'s
  2387.  *        implementation. in November 2012.
  2388.  *        This is seamless for most code, but it makes it so that the only
  2389.  *        argument that can be a format string is the first one, so any
  2390.  *        code which used multiple format strings has broken. Please change
  2391.  *        your calls to format accordingly.
  2392.  *
  2393.  *        e.g.:
  2394.  *        ----
  2395.  *        format("key = %s", key, ", value = %s", value)
  2396.  *        ----
  2397.  *        needs to be rewritten as:
  2398.  *        ----
  2399.  *        format("key = %s, value = %s", key, value)
  2400.  *        ----
  2401.  *   )
  2402.  */
  2403. string format(Char, Args...)(in Char[] fmt, Args args)
  2404. {
  2405.     auto w = appender!string();
  2406.     auto n = formattedWrite(w, fmt, args);
  2407.     version (all)
  2408.     {
  2409.         // In the future, this check will be removed to increase consistency
  2410.         // with formattedWrite
  2411.         enforce(n == args.length, new FormatException(
  2412.             text("Orphan format arguments: args[", n, "..", args.length, "]")));
  2413.     }
  2414.     return w.data;
  2415. }
  2416.  
  2417. unittest
  2418. {
  2419.     debug(string) printf("std.string.format.unittest\n");
  2420.  
  2421.     assertCTFEable!(
  2422.     {
  2423. //  assert(format(null) == "");
  2424.     assert(format("foo") == "foo");
  2425.     assert(format("foo%%") == "foo%");
  2426.     assert(format("foo%s", 'C') == "fooC");
  2427.     assert(format("%s foo", "bar") == "bar foo");
  2428.     assert(format("%s foo %s", "bar", "abc") == "bar foo abc");
  2429.     assert(format("foo %d", -123) == "foo -123");
  2430.     assert(format("foo %d", 123) == "foo 123");
  2431.  
  2432.     assertThrown!FormatException(format("foo %s"));
  2433.     assertThrown!FormatException(format("foo %s", 123, 456));
  2434.  
  2435.     assert(format("hel%slo%s%s%s", "world", -138, 'c', true) ==
  2436.                   "helworldlo-138ctrue");
  2437.     });
  2438. }
  2439.  
  2440.  
  2441. /*****************************************************
  2442.  * Format arguments into buffer <i>buf</i> which must be large
  2443.  * enough to hold the result. Throws RangeError if it is not.
  2444.  * Returns: The slice of $(D buf) containing the formatted string.
  2445.  *
  2446.  *  $(RED sformat's current implementation has been replaced with $(LREF xsformat)'s
  2447.  *        implementation. in November 2012.
  2448.  *        This is seamless for most code, but it makes it so that the only
  2449.  *        argument that can be a format string is the first one, so any
  2450.  *        code which used multiple format strings has broken. Please change
  2451.  *        your calls to sformat accordingly.
  2452.  *
  2453.  *        e.g.:
  2454.  *        ----
  2455.  *        sformat(buf, "key = %s", key, ", value = %s", value)
  2456.  *        ----
  2457.  *        needs to be rewritten as:
  2458.  *        ----
  2459.  *        sformat(buf, "key = %s, value = %s", key, value)
  2460.  *        ----
  2461.  *   )
  2462.  */
  2463. char[] sformat(Char, Args...)(char[] buf, in Char[] fmt, Args args)
  2464. {
  2465.     size_t i;
  2466.  
  2467.     struct Sink
  2468.     {
  2469.         void put(dchar c)
  2470.         {
  2471.             char[4] enc;
  2472.             auto n = encode(enc, c);
  2473.  
  2474.             if (buf.length < i + n)
  2475.                 onRangeError("std.string.sformat", 0);
  2476.  
  2477.             buf[i .. i + n] = enc[0 .. n];
  2478.             i += n;
  2479.         }
  2480.         void put(const(char)[] s)
  2481.         {
  2482.             if (buf.length < i + s.length)
  2483.                 onRangeError("std.string.sformat", 0);
  2484.  
  2485.             buf[i .. i + s.length] = s[];
  2486.             i += s.length;
  2487.         }
  2488.         void put(const(wchar)[] s)
  2489.         {
  2490.             for (; !s.empty; s.popFront())
  2491.                 put(s.front);
  2492.         }
  2493.         void put(const(dchar)[] s)
  2494.         {
  2495.             for (; !s.empty; s.popFront())
  2496.                 put(s.front);
  2497.         }
  2498.     }
  2499.     auto n = formattedWrite(Sink(), fmt, args);
  2500.     version (all)
  2501.     {
  2502.         // In the future, this check will be removed to increase consistency
  2503.         // with formattedWrite
  2504.         enforce(n == args.length, new FormatException(
  2505.             text("Orphan format arguments: args[", n, "..", args.length, "]")));
  2506.     }
  2507.     return buf[0 .. i];
  2508. }
  2509.  
  2510. unittest
  2511. {
  2512.     debug(string) printf("std.string.sformat.unittest\n");
  2513.  
  2514.     assertCTFEable!(
  2515.     {
  2516.     char[10] buf;
  2517.  
  2518.     assert(sformat(buf[], "foo") == "foo");
  2519.     assert(sformat(buf[], "foo%%") == "foo%");
  2520.     assert(sformat(buf[], "foo%s", 'C') == "fooC");
  2521.     assert(sformat(buf[], "%s foo", "bar") == "bar foo");
  2522.     assertThrown!RangeError(sformat(buf[], "%s foo %s", "bar", "abc"));
  2523.     assert(sformat(buf[], "foo %d", -123) == "foo -123");
  2524.     assert(sformat(buf[], "foo %d", 123) == "foo 123");
  2525.  
  2526.     assertThrown!FormatException(sformat(buf[], "foo %s"));
  2527.     assertThrown!FormatException(sformat(buf[], "foo %s", 123, 456));
  2528.  
  2529.     assert(sformat(buf[], "%s %s %s", "c"c, "w"w, "d"d) == "c w d");
  2530.     });
  2531. }
  2532.  
  2533. // Explicitly undocumented. It will be removed in July 2014.
  2534. deprecated("Please use std.string.format instead.") alias xformat = format;
  2535.  
  2536. deprecated unittest
  2537. {
  2538.     debug(string) printf("std.string.xformat.unittest\n");
  2539.  
  2540.     assertCTFEable!(
  2541.     {
  2542. //  assert(xformat(null) == "");
  2543.     assert(xformat("foo") == "foo");
  2544.     assert(xformat("foo%%") == "foo%");
  2545.     assert(xformat("foo%s", 'C') == "fooC");
  2546.     assert(xformat("%s foo", "bar") == "bar foo");
  2547.     assert(xformat("%s foo %s", "bar", "abc") == "bar foo abc");
  2548.     assert(xformat("foo %d", -123) == "foo -123");
  2549.     assert(xformat("foo %d", 123) == "foo 123");
  2550.  
  2551.     assertThrown!FormatException(xformat("foo %s"));
  2552.     assertThrown!FormatException(xformat("foo %s", 123, 456));
  2553.     });
  2554. }
  2555.  
  2556. // Explicitly undocumented. It will be removed in July 2014.
  2557. deprecated("Please use std.string.sformat instead.") alias xsformat = sformat;
  2558.  
  2559. deprecated unittest
  2560. {
  2561.     debug(string) printf("std.string.xsformat.unittest\n");
  2562.  
  2563.     assertCTFEable!(
  2564.     {
  2565.     char[10] buf;
  2566.  
  2567.     assert(xsformat(buf[], "foo") == "foo");
  2568.     assert(xsformat(buf[], "foo%%") == "foo%");
  2569.     assert(xsformat(buf[], "foo%s", 'C') == "fooC");
  2570.     assert(xsformat(buf[], "%s foo", "bar") == "bar foo");
  2571.     assertThrown!RangeError(xsformat(buf[], "%s foo %s", "bar", "abc"));
  2572.     assert(xsformat(buf[], "foo %d", -123) == "foo -123");
  2573.     assert(xsformat(buf[], "foo %d", 123) == "foo 123");
  2574.  
  2575.     assertThrown!FormatException(xsformat(buf[], "foo %s"));
  2576.     assertThrown!FormatException(xsformat(buf[], "foo %s", 123, 456));
  2577.  
  2578.     assert(xsformat(buf[], "%s %s %s", "c"c, "w"w, "d"d) == "c w d");
  2579.     });
  2580. }
  2581.  
  2582.  
  2583. /***********************************************
  2584.  * See if character c is in the pattern.
  2585.  * Patterns:
  2586.  *
  2587.  *  A <i>pattern</i> is an array of characters much like a <i>character
  2588.  *  class</i> in regular expressions. A sequence of characters
  2589.  *  can be given, such as "abcde". The '-' can represent a range
  2590.  *  of characters, as "a-e" represents the same pattern as "abcde".
  2591.  *  "a-fA-F0-9" represents all the hex characters.
  2592.  *  If the first character of a pattern is '^', then the pattern
  2593.  *  is negated, i.e. "^0-9" means any character except a digit.
  2594.  *  The functions inPattern, <b>countchars</b>, <b>removeschars</b>,
  2595.  *  and <b>squeeze</b>
  2596.  *  use patterns.
  2597.  *
  2598.  * Note: In the future, the pattern syntax may be improved
  2599.  *  to be more like regular expression character classes.
  2600.  */
  2601.  
  2602. bool inPattern(S)(dchar c, in S pattern) @safe pure if (isSomeString!S)
  2603. {
  2604.     bool result = false;
  2605.     int range = 0;
  2606.     dchar lastc;
  2607.  
  2608.     foreach (size_t i, dchar p; pattern)
  2609.     {
  2610.         if (p == '^' && i == 0)
  2611.         {
  2612.             result = true;
  2613.             if (i + 1 == pattern.length)
  2614.                 return (c == p);    // or should this be an error?
  2615.         }
  2616.         else if (range)
  2617.         {
  2618.             range = 0;
  2619.             if (lastc <= c && c <= p || c == p)
  2620.                 return !result;
  2621.         }
  2622.         else if (p == '-' && i > result && i + 1 < pattern.length)
  2623.         {
  2624.             range = 1;
  2625.             continue;
  2626.         }
  2627.         else if (c == p)
  2628.             return !result;
  2629.         lastc = p;
  2630.     }
  2631.     return result;
  2632. }
  2633.  
  2634.  
  2635. unittest
  2636. {
  2637.     debug(string) printf("std.string.inPattern.unittest\n");
  2638.  
  2639.     assertCTFEable!(
  2640.     {
  2641.     assert(inPattern('x', "x") == 1);
  2642.     assert(inPattern('x', "y") == 0);
  2643.     assert(inPattern('x', string.init) == 0);
  2644.     assert(inPattern('x', "^y") == 1);
  2645.     assert(inPattern('x', "yxxy") == 1);
  2646.     assert(inPattern('x', "^yxxy") == 0);
  2647.     assert(inPattern('x', "^abcd") == 1);
  2648.     assert(inPattern('^', "^^") == 0);
  2649.     assert(inPattern('^', "^") == 1);
  2650.     assert(inPattern('^', "a^") == 1);
  2651.     assert(inPattern('x', "a-z") == 1);
  2652.     assert(inPattern('x', "A-Z") == 0);
  2653.     assert(inPattern('x', "^a-z") == 0);
  2654.     assert(inPattern('x', "^A-Z") == 1);
  2655.     assert(inPattern('-', "a-") == 1);
  2656.     assert(inPattern('-', "^A-") == 0);
  2657.     assert(inPattern('a', "z-a") == 1);
  2658.     assert(inPattern('z', "z-a") == 1);
  2659.     assert(inPattern('x', "z-a") == 0);
  2660.     });
  2661. }
  2662.  
  2663.  
  2664. /***********************************************
  2665.  * See if character c is in the intersection of the patterns.
  2666.  */
  2667.  
  2668. bool inPattern(S)(dchar c, S[] patterns) @safe pure if (isSomeString!S)
  2669. {
  2670.     foreach (string pattern; patterns)
  2671.     {
  2672.         if (!inPattern(c, pattern))
  2673.         {
  2674.             return false;
  2675.         }
  2676.     }
  2677.     return true;
  2678. }
  2679.  
  2680.  
  2681. /********************************************
  2682.  * Count characters in s that match pattern.
  2683.  */
  2684.  
  2685. size_t countchars(S, S1)(S s, in S1 pattern) @safe pure if (isSomeString!S && isSomeString!S1)
  2686. {
  2687.     size_t count;
  2688.     foreach (dchar c; s)
  2689.     {
  2690.         count += inPattern(c, pattern);
  2691.     }
  2692.     return count;
  2693. }
  2694.  
  2695. unittest
  2696. {
  2697.     debug(string) printf("std.string.count.unittest\n");
  2698.  
  2699.     assertCTFEable!(
  2700.     {
  2701.     assert(countchars("abc", "a-c") == 3);
  2702.     assert(countchars("hello world", "or") == 3);
  2703.     });
  2704. }
  2705.  
  2706.  
  2707. /********************************************
  2708.  * Return string that is s with all characters removed that match pattern.
  2709.  */
  2710.  
  2711. S removechars(S)(S s, in S pattern) @safe pure if (isSomeString!S)
  2712. {
  2713.     Unqual!(typeof(s[0]))[] r;
  2714.     bool changed = false;
  2715.  
  2716.     foreach (size_t i, dchar c; s)
  2717.     {
  2718.         if (inPattern(c, pattern))
  2719.         {
  2720.             if (!changed)
  2721.             {
  2722.                 changed = true;
  2723.                 r = s[0 .. i].dup;
  2724.             }
  2725.             continue;
  2726.         }
  2727.         if (changed)
  2728.         {
  2729.             std.utf.encode(r, c);
  2730.         }
  2731.     }
  2732.     if (changed)
  2733.         return r;
  2734.     else
  2735.         return s;
  2736. }
  2737.  
  2738. unittest
  2739. {
  2740.     debug(string) printf("std.string.removechars.unittest\n");
  2741.  
  2742.     assertCTFEable!(
  2743.     {
  2744.     assert(removechars("abc", "a-c").length == 0);
  2745.     assert(removechars("hello world", "or") == "hell wld");
  2746.     assert(removechars("hello world", "d") == "hello worl");
  2747.     assert(removechars("hah", "h") == "a");
  2748.     });
  2749. }
  2750.  
  2751.  
  2752. /***************************************************
  2753.  * Return string where sequences of a character in s[] from pattern[]
  2754.  * are replaced with a single instance of that character.
  2755.  * If pattern is null, it defaults to all characters.
  2756.  */
  2757.  
  2758. S squeeze(S)(S s, in S pattern = null)
  2759. {
  2760.     Unqual!(typeof(s[0]))[] r;
  2761.     dchar lastc;
  2762.     size_t lasti;
  2763.     int run;
  2764.     bool changed;
  2765.  
  2766.     foreach (size_t i, dchar c; s)
  2767.     {
  2768.         if (run && lastc == c)
  2769.         {
  2770.             changed = true;
  2771.         }
  2772.         else if (pattern is null || inPattern(c, pattern))
  2773.         {
  2774.             run = 1;
  2775.             if (changed)
  2776.             {
  2777.                 if (r is null)
  2778.                     r = s[0 .. lasti].dup;
  2779.                 std.utf.encode(r, c);
  2780.             }
  2781.             else
  2782.                 lasti = i + std.utf.stride(s, i);
  2783.             lastc = c;
  2784.         }
  2785.         else
  2786.         {
  2787.             run = 0;
  2788.             if (changed)
  2789.             {
  2790.                 if (r is null)
  2791.                     r = s[0 .. lasti].dup;
  2792.                 std.utf.encode(r, c);
  2793.             }
  2794.         }
  2795.     }
  2796.     return changed ? ((r is null) ? s[0 .. lasti] : cast(S) r) : s;
  2797. }
  2798.  
  2799. unittest
  2800. {
  2801.     debug(string) printf("std.string.squeeze.unittest\n");
  2802.  
  2803.     assertCTFEable!(
  2804.     {
  2805.     string s;
  2806.  
  2807.     assert(squeeze("hello") == "helo");
  2808.  
  2809.     s = "abcd";
  2810.     assert(squeeze(s) is s);
  2811.     s = "xyzz";
  2812.     assert(squeeze(s).ptr == s.ptr); // should just be a slice
  2813.  
  2814.     assert(squeeze("hello goodbyee", "oe") == "hello godbye");
  2815.     });
  2816. }
  2817.  
  2818. /***************************************************************
  2819.  Finds the position $(D_PARAM pos) of the first character in $(D_PARAM
  2820.  s) that does not match $(D_PARAM pattern) (in the terminology used by
  2821.  $(LINK2 std_string.html,inPattern)). Updates $(D_PARAM s =
  2822.  s[pos..$]). Returns the slice from the beginning of the original
  2823.  (before update) string up to, and excluding, $(D_PARAM pos).
  2824.  
  2825.  Example:
  2826.  ---
  2827.  string s = "123abc";
  2828.  string t = munch(s, "0123456789");
  2829.  assert(t == "123" && s == "abc");
  2830.  t = munch(s, "0123456789");
  2831.  assert(t == "" && s == "abc");
  2832.  ---
  2833.  
  2834. The $(D_PARAM munch) function is mostly convenient for skipping
  2835. certain category of characters (e.g. whitespace) when parsing
  2836. strings. (In such cases, the return value is not used.)
  2837.  */
  2838.  
  2839. S1 munch(S1, S2)(ref S1 s, S2 pattern)
  2840. {
  2841.     size_t j = s.length;
  2842.     foreach (i, dchar c; s)
  2843.     {
  2844.         if (!inPattern(c, pattern))
  2845.         {
  2846.             j = i;
  2847.             break;
  2848.         }
  2849.     }
  2850.     scope(exit) s = s[j .. $];
  2851.     return s[0 .. j];
  2852. }
  2853.  
  2854. @safe pure unittest
  2855. {
  2856.     string s = "123€abc";
  2857.     string t = munch(s, "0123456789");
  2858.     assert(t == "123" && s == "€abc");
  2859.     t = munch(s, "0123456789");
  2860.     assert(t == "" && s == "€abc");
  2861.     t = munch(s, "£$€¥");
  2862.     assert(t == "€" && s == "abc");
  2863. }
  2864.  
  2865.  
  2866. /**********************************************
  2867.  * Return string that is the 'successor' to s[].
  2868.  * If the rightmost character is a-zA-Z0-9, it is incremented within
  2869.  * its case or digits. If it generates a carry, the process is
  2870.  * repeated with the one to its immediate left.
  2871.  */
  2872.  
  2873. S succ(S)(S s) @safe pure if (isSomeString!S)
  2874. {
  2875.     if (s.length && std.ascii.isAlphaNum(s[$ - 1]))
  2876.     {
  2877.         auto r = s.dup;
  2878.         size_t i = r.length - 1;
  2879.  
  2880.         while (1)
  2881.         {
  2882.             dchar c = s[i];
  2883.             dchar carry;
  2884.  
  2885.             switch (c)
  2886.             {
  2887.             case '9':
  2888.                 c = '0';
  2889.                 carry = '1';
  2890.                 goto Lcarry;
  2891.             case 'z':
  2892.             case 'Z':
  2893.                 c -= 'Z' - 'A';
  2894.                 carry = c;
  2895.             Lcarry:
  2896.                 r[i] = cast(char)c;
  2897.                 if (i == 0)
  2898.                 {
  2899.                     auto t = new typeof(r[0])[r.length + 1];
  2900.                     t[0] = cast(char) carry;
  2901.                     t[1 .. $] = r[];
  2902.                     return t;
  2903.                 }
  2904.                 i--;
  2905.                 break;
  2906.  
  2907.             default:
  2908.                 if (std.ascii.isAlphaNum(c))
  2909.                     r[i]++;
  2910.                 return r;
  2911.             }
  2912.         }
  2913.     }
  2914.     return s;
  2915. }
  2916.  
  2917. unittest
  2918. {
  2919.     debug(string) printf("std.string.succ.unittest\n");
  2920.  
  2921.     assertCTFEable!(
  2922.     {
  2923.     assert(succ(string.init) is null);
  2924.     assert(succ("!@#$%") == "!@#$%");
  2925.     assert(succ("1") == "2");
  2926.     assert(succ("9") == "10");
  2927.     assert(succ("999") == "1000");
  2928.     assert(succ("zz99") == "aaa00");
  2929.     });
  2930. }
  2931.  
  2932.  
  2933. /++
  2934.     Replaces the characters in $(D str) which are in $(D from) with the
  2935.     the corresponding characters in $(D to) and returns the resulting string.
  2936.  
  2937.     $(D tr) is based on
  2938.     $(WEB pubs.opengroup.org/onlinepubs/9699919799/utilities/_tr.html, Posix's tr),
  2939.     though it doesn't do everything that the Posix utility does.
  2940.  
  2941.     Params:
  2942.         str       = The original string.
  2943.         from      = The characters to replace.
  2944.         to        = The characters to replace with.
  2945.         modifiers = String containing modifiers.
  2946.  
  2947.     Modifiers:
  2948.         $(BOOKTABLE,
  2949.         $(TR $(TD Modifier) $(TD Description))
  2950.         $(TR $(TD $(D 'c')) $(TD Complement the list of characters in $(D from)))
  2951.         $(TR $(TD $(D 'd')) $(TD Removes matching characters with no corresponding
  2952.                               replacement in $(D to)))
  2953.         $(TR $(TD $(D 's')) $(TD Removes adjacent duplicates in the replaced
  2954.                               characters))
  2955.         )
  2956.  
  2957.     If the modifier $(D 'd') is present, then the number of characters in
  2958.     $(D to) may be only $(D 0) or $(D 1).
  2959.  
  2960.     If the modifier $(D 'd') is $(I not) present, and $(D to) is empty, then
  2961.     $(D to) is taken to be the same as $(D from).
  2962.  
  2963.     If the modifier $(D 'd') is $(I not) present, and $(D to) is shorter than
  2964.     $(D from), then $(D to) is extended by replicating the last character in
  2965.     $(D to).
  2966.  
  2967.     Both $(D from) and $(D to) may contain ranges using the $(D '-') character
  2968.     (e.g. $(D "a-d") is synonymous with $(D "abcd").) Neither accept a leading
  2969.     $(D '^') as meaning the complement of the string (use the $(D 'c') modifier
  2970.     for that).
  2971.   +/
  2972. C1[] tr(C1, C2, C3, C4 = immutable char)
  2973.        (C1[] str, const(C2)[] from, const(C3)[] to, const(C4)[] modifiers = null)
  2974. {
  2975.     bool mod_c;
  2976.     bool mod_d;
  2977.     bool mod_s;
  2978.  
  2979.     foreach (char c; modifiers)
  2980.     {
  2981.         switch (c)
  2982.         {
  2983.         case 'c':   mod_c = 1; break;   // complement
  2984.         case 'd':   mod_d = 1; break;   // delete unreplaced chars
  2985.         case 's':   mod_s = 1; break;   // squeeze duplicated replaced chars
  2986.         default:    assert(0);
  2987.         }
  2988.     }
  2989.  
  2990.     if (to.empty && !mod_d)
  2991.         to = std.conv.to!(typeof(to))(from);
  2992.  
  2993.     auto result = appender!(C1[])();
  2994.     bool modified;
  2995.     dchar lastc;
  2996.  
  2997.     foreach (dchar c; str)
  2998.     {
  2999.         dchar lastf;
  3000.         dchar lastt;
  3001.         dchar newc;
  3002.         int n = 0;
  3003.  
  3004.         for (size_t i = 0; i < from.length; )
  3005.         {
  3006.             dchar f = std.utf.decode(from, i);
  3007.             if (f == '-' && lastf != dchar.init && i < from.length)
  3008.             {
  3009.                 dchar nextf = std.utf.decode(from, i);
  3010.                 if (lastf <= c && c <= nextf)
  3011.                 {
  3012.                     n += c - lastf - 1;
  3013.                     if (mod_c)
  3014.                         goto Lnotfound;
  3015.                     goto Lfound;
  3016.                 }
  3017.                 n += nextf - lastf;
  3018.                 lastf = lastf.init;
  3019.                 continue;
  3020.             }
  3021.  
  3022.             if (c == f)
  3023.             {   if (mod_c)
  3024.                     goto Lnotfound;
  3025.                 goto Lfound;
  3026.             }
  3027.             lastf = f;
  3028.             n++;
  3029.         }
  3030.         if (!mod_c)
  3031.             goto Lnotfound;
  3032.         n = 0;          // consider it 'found' at position 0
  3033.  
  3034.       Lfound:
  3035.  
  3036.         // Find the nth character in to[]
  3037.         dchar nextt;
  3038.         for (size_t i = 0; i < to.length; )
  3039.         {   dchar t = std.utf.decode(to, i);
  3040.             if (t == '-' && lastt != dchar.init && i < to.length)
  3041.             {
  3042.                 nextt = std.utf.decode(to, i);
  3043.                 n -= nextt - lastt;
  3044.                 if (n < 0)
  3045.                 {
  3046.                     newc = nextt + n + 1;
  3047.                     goto Lnewc;
  3048.                 }
  3049.                 lastt = dchar.init;
  3050.                 continue;
  3051.             }
  3052.             if (n == 0)
  3053.             {   newc = t;
  3054.                 goto Lnewc;
  3055.             }
  3056.             lastt = t;
  3057.             nextt = t;
  3058.             n--;
  3059.         }
  3060.         if (mod_d)
  3061.             continue;
  3062.         newc = nextt;
  3063.  
  3064.       Lnewc:
  3065.         if (mod_s && modified && newc == lastc)
  3066.             continue;
  3067.         result.put(newc);
  3068.         assert(newc != dchar.init);
  3069.         modified = true;
  3070.         lastc = newc;
  3071.         continue;
  3072.  
  3073.       Lnotfound:
  3074.         result.put(c);
  3075.         lastc = c;
  3076.         modified = false;
  3077.     }
  3078.  
  3079.     return result.data;
  3080. }
  3081.  
  3082. unittest
  3083. {
  3084.     debug(string) printf("std.string.tr.unittest\n");
  3085.     import std.algorithm;
  3086.  
  3087.     // Complete list of test types; too slow to test'em all
  3088.     // alias TestTypes = TypeTuple!(
  3089.     //          char[], const( char)[], immutable( char)[],
  3090.     //         wchar[], const(wchar)[], immutable(wchar)[],
  3091.     //         dchar[], const(dchar)[], immutable(dchar)[]);
  3092.  
  3093.     // Reduced list of test types
  3094.     alias TestTypes = TypeTuple!(char[], const(wchar)[], immutable(dchar)[]);
  3095.  
  3096.     assertCTFEable!(
  3097.     {
  3098.     foreach (S; TestTypes)
  3099.     {
  3100.         foreach (T; TestTypes)
  3101.         {
  3102.             foreach (U; TestTypes)
  3103.             {
  3104.                 assert(equal(tr(to!S("abcdef"), to!T("cd"), to!U("CD")), "abCDef"));
  3105.                 assert(equal(tr(to!S("abcdef"), to!T("b-d"), to!U("B-D")), "aBCDef"));
  3106.                 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-Dx")), "aBCDefgx"));
  3107.                 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-CDx")), "aBCDefgx"));
  3108.                 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-BCDx")), "aBCDefgx"));
  3109.                 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U("*"), to!S("c")), "****ef"));
  3110.                 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U(""), to!T("d")), "abcd"));
  3111.                 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U(""), to!U("s")), "helo godbye"));
  3112.                 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U("x"), "s"), "hex gxdbye"));
  3113.                 assert(equal(tr(to!S("14-Jul-87"), to!T("a-zA-Z"), to!U(" "), "cs"), " Jul "));
  3114.                 assert(equal(tr(to!S("Abc"), to!T("AAA"), to!U("XYZ")), "Xbc"));
  3115.             }
  3116.         }
  3117.  
  3118.         auto s = to!S("hello world");
  3119.         static assert(is(typeof(s) == typeof(tr(s, "he", "if"))));
  3120.     }
  3121.     });
  3122. }
  3123.  
  3124.  
  3125. /* ************************************************
  3126.  * Version       : v0.3
  3127.  * Author        : David L. 'SpottedTiger' Davis
  3128.  * Date Created  : 31.May.05 Compiled and Tested with dmd v0.125
  3129.  * Date Modified : 01.Jun.05 Modified the function to handle the
  3130.  *               :           imaginary and complex float-point
  3131.  *               :           datatypes.
  3132.  *               :
  3133.  * Licence       : Public Domain / Contributed to Digital Mars
  3134.  */
  3135.  
  3136. /**
  3137.  * [in] string s can be formatted in the following ways:
  3138.  *
  3139.  * Integer Whole Number:
  3140.  * (for byte, ubyte, short, ushort, int, uint, long, and ulong)
  3141.  * ['+'|'-']digit(s)[U|L|UL]
  3142.  *
  3143.  * examples: 123, 123UL, 123L, +123U, -123L
  3144.  *
  3145.  * Floating-Point Number:
  3146.  * (for float, double, real, ifloat, idouble, and ireal)
  3147.  * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
  3148.  *      or [nan|nani|inf|-inf]
  3149.  *
  3150.  * examples: +123., -123.01, 123.3e-10f, 123.3e-10fi, 123.3e-10L
  3151.  *
  3152.  * (for cfloat, cdouble, and creal)
  3153.  * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+]
  3154.  *         [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
  3155.  *      or [nan|nani|nan+nani|inf|-inf]
  3156.  *
  3157.  * examples: nan, -123e-1+456.9e-10Li, +123e+10+456i, 123+456
  3158.  *
  3159.  * [in] bool bAllowSep
  3160.  * False by default, but when set to true it will accept the
  3161.  * separator characters "," and "_" within the string, but these
  3162.  * characters should be stripped from the string before using any
  3163.  * of the conversion functions like toInt(), toFloat(), and etc
  3164.  * else an error will occur.
  3165.  *
  3166.  * Also please note, that no spaces are allowed within the string
  3167.  * anywhere whether it's a leading, trailing, or embedded space(s),
  3168.  * thus they too must be stripped from the string before using this
  3169.  * function, or any of the conversion functions.
  3170.  */
  3171.  
  3172. bool isNumeric(const(char)[] s, in bool bAllowSep = false) @safe pure
  3173. {
  3174.     immutable iLen = s.length;
  3175.     if (iLen == 0)
  3176.         return false;
  3177.  
  3178.     // Check for NaN (Not a Number) and for Infinity
  3179.     if (s.among!((a, b) => icmp(a, b) == 0)
  3180.             ("nan", "nani", "nan+nani", "inf", "-inf"))
  3181.         return true;
  3182.  
  3183.     immutable j = s[0].among!('-', '+') != 0;
  3184.     bool bDecimalPoint, bExponent, bComplex, sawDigits;
  3185.  
  3186.     for (size_t i = j; i < iLen; i++)
  3187.     {
  3188.         immutable c = s[i];
  3189.  
  3190.         // Digits are good, continue checking
  3191.         // with the popFront character... ;)
  3192.         if (c >= '0' && c <= '9')
  3193.         {
  3194.             sawDigits = true;
  3195.             continue;
  3196.         }
  3197.  
  3198.         // Check for the complex type, and if found
  3199.         // reset the flags for checking the 2nd number.
  3200.         if (c == '+')
  3201.         {
  3202.             if (!i)
  3203.                 return false;
  3204.             bDecimalPoint = false;
  3205.             bExponent = false;
  3206.             bComplex = true;
  3207.             sawDigits = false;
  3208.             continue;
  3209.         }
  3210.  
  3211.         // Allow only one exponent per number
  3212.         if (c.among!('e', 'E'))
  3213.         {
  3214.             // A 2nd exponent found, return not a number
  3215.             if (bExponent || i + 1 >= iLen)
  3216.                 return false;
  3217.             // Look forward for the sign, and if
  3218.             // missing then this is not a number.
  3219.             if (!s[i + 1].among!('-', '+'))
  3220.                 return false;
  3221.             bExponent = true;
  3222.             i++;
  3223.             continue;
  3224.         }
  3225.         // Allow only one decimal point per number to be used
  3226.         if (c == '.' )
  3227.         {
  3228.             // A 2nd decimal point found, return not a number
  3229.             if (bDecimalPoint)
  3230.                 return false;
  3231.             bDecimalPoint = true;
  3232.             continue;
  3233.         }
  3234.         // Check for ending literal characters: "f,u,l,i,ul,fi,li",
  3235.         // and whether they're being used with the correct datatype.
  3236.         if (i == iLen - 2)
  3237.         {
  3238.             if (!sawDigits)
  3239.                 return false;
  3240.             // Integer Whole Number
  3241.             if (icmp(s[i..iLen], "ul") == 0 &&
  3242.                     (!bDecimalPoint && !bExponent && !bComplex))
  3243.                 return true;
  3244.             // Floating-Point Number
  3245.             if (s[i..iLen].among!((a, b) => icmp(a, b) == 0)("fi", "li") &&
  3246.                     (bDecimalPoint || bExponent || bComplex))
  3247.                 return true;
  3248.             if (icmp(s[i..iLen], "ul") == 0 &&
  3249.                     (bDecimalPoint || bExponent || bComplex))
  3250.                 return false;
  3251.             // Could be a Integer or a Float, thus
  3252.             // all these suffixes are valid for both
  3253.             return s[i..iLen].among!((a, b) => icmp(a, b) == 0)
  3254.                 ("ul", "fi", "li") != 0;
  3255.         }
  3256.         if (i == iLen - 1)
  3257.         {
  3258.             if (!sawDigits)
  3259.                 return false;
  3260.             // Integer Whole Number
  3261.             if (c.among!('u', 'l', 'U', 'L') &&
  3262.                    (!bDecimalPoint && !bExponent && !bComplex))
  3263.                 return true;
  3264.             // Check to see if the last character in the string
  3265.             // is the required 'i' character
  3266.             if (bComplex)
  3267.                 return c.among!('i', 'I') != 0;
  3268.             // Floating-Point Number
  3269.             return c.among!('l', 'L', 'f', 'F', 'i', 'I') != 0;
  3270.         }
  3271.         // Check if separators are allowed to be in the numeric string
  3272.         if (!bAllowSep || !c.among!('_', ','))
  3273.             return false;
  3274.     }
  3275.  
  3276.     return sawDigits;
  3277. }
  3278.  
  3279. unittest
  3280. {
  3281.     assert(!isNumeric("F"));
  3282.     assert(!isNumeric("L"));
  3283.     assert(!isNumeric("U"));
  3284.     assert(!isNumeric("i"));
  3285.     assert(!isNumeric("fi"));
  3286.     assert(!isNumeric("ul"));
  3287.     assert(!isNumeric("li"));
  3288.     assert(!isNumeric("."));
  3289.     assert(!isNumeric("-"));
  3290.     assert(!isNumeric("+"));
  3291.     assert(!isNumeric("e-"));
  3292.     assert(!isNumeric("e+"));
  3293.     assert(!isNumeric(".f"));
  3294.     assert(!isNumeric("e+f"));
  3295. }
  3296.  
  3297.  
  3298. unittest
  3299. {
  3300.     debug(string) printf("isNumeric(in string, bool = false).unittest\n");
  3301.  
  3302.     assertCTFEable!(
  3303.     {
  3304.     // Test the isNumeric(in string) function
  3305.     assert(isNumeric("1") == true );
  3306.     assert(isNumeric("1.0") == true );
  3307.     assert(isNumeric("1e-1") == true );
  3308.     assert(isNumeric("12345xxxx890") == false );
  3309.     assert(isNumeric("567L") == true );
  3310.     assert(isNumeric("23UL") == true );
  3311.     assert(isNumeric("-123..56f") == false );
  3312.     assert(isNumeric("12.3.5.6") == false );
  3313.     assert(isNumeric(" 12.356") == false );
  3314.     assert(isNumeric("123 5.6") == false );
  3315.     assert(isNumeric("1233E-1+1.0e-1i") == true );
  3316.  
  3317.     assert(isNumeric("123.00E-5+1234.45E-12Li") == true);
  3318.     assert(isNumeric("123.00e-5+1234.45E-12iL") == false);
  3319.     assert(isNumeric("123.00e-5+1234.45e-12uL") == false);
  3320.     assert(isNumeric("123.00E-5+1234.45e-12lu") == false);
  3321.  
  3322.     assert(isNumeric("123fi") == true);
  3323.     assert(isNumeric("123li") == true);
  3324.     assert(isNumeric("--123L") == false);
  3325.     assert(isNumeric("+123.5UL") == false);
  3326.     assert(isNumeric("123f") == true);
  3327.     assert(isNumeric("123.u") == false);
  3328.  
  3329.   // @@@BUG@@ to!string(float) is not CTFEable.
  3330.   // Related: formatValue(T) if (is(FloatingPointTypeOf!T))
  3331.   if (!__ctfe)
  3332.   {
  3333.     assert(isNumeric(to!string(real.nan)) == true);
  3334.     assert(isNumeric(to!string(-real.infinity)) == true);
  3335.     assert(isNumeric(to!string(123e+2+1234.78Li)) == true);
  3336.   }
  3337.  
  3338.     string s = "$250.99-";
  3339.     assert(isNumeric(s[1..s.length - 2]) == true);
  3340.     assert(isNumeric(s) == false);
  3341.     assert(isNumeric(s[0..s.length - 1]) == false);
  3342.     });
  3343.  
  3344.     assert(!isNumeric("-"));
  3345.     assert(!isNumeric("+"));
  3346. }
  3347.  
  3348.  
  3349. /*****************************
  3350.  * Soundex algorithm.
  3351.  *
  3352.  * The Soundex algorithm converts a word into 4 characters
  3353.  * based on how the word sounds phonetically. The idea is that
  3354.  * two spellings that sound alike will have the same Soundex
  3355.  * value, which means that Soundex can be used for fuzzy matching
  3356.  * of names.
  3357.  *
  3358.  * Params:
  3359.  *  string = String to convert to Soundex representation.
  3360.  *  buffer = Optional 4 char array to put the resulting Soundex
  3361.  *      characters into. If null, the return value
  3362.  *      buffer will be allocated on the heap.
  3363.  * Returns:
  3364.  *  The four character array with the Soundex result in it.
  3365.  *  Returns null if there is no Soundex representation for the string.
  3366.  *
  3367.  * See_Also:
  3368.  *  $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia),
  3369.  *  $(LUCKY The Soundex Indexing System)
  3370.  *
  3371.  * Bugs:
  3372.  *  Only works well with English names.
  3373.  *  There are other arguably better Soundex algorithms,
  3374.  *  but this one is the standard one.
  3375.  */
  3376.  
  3377. char[] soundex(const(char)[] string, char[] buffer = null) @safe pure nothrow
  3378. in
  3379. {
  3380.     assert(!buffer.ptr || buffer.length >= 4);
  3381. }
  3382. out (result)
  3383. {
  3384.     if (result.ptr)
  3385.     {
  3386.         assert(result.length == 4);
  3387.         assert(result[0] >= 'A' && result[0] <= 'Z');
  3388.         foreach (char c; result[1 .. 4])
  3389.             assert(c >= '0' && c <= '6');
  3390.     }
  3391. }
  3392. body
  3393. {
  3394.     static immutable dex =
  3395.         // ABCDEFGHIJKLMNOPQRSTUVWXYZ
  3396.         "01230120022455012623010202";
  3397.  
  3398.     int b = 0;
  3399.     char lastc;
  3400.     foreach (char cs; string)
  3401.     {   auto c = cs;        // necessary because cs is final
  3402.  
  3403.         if (c >= 'a' && c <= 'z')
  3404.             c -= 'a' - 'A';
  3405.         else if (c >= 'A' && c <= 'Z')
  3406.         {
  3407.         }
  3408.         else
  3409.         {
  3410.             lastc = lastc.init;
  3411.             continue;
  3412.         }
  3413.         if (b == 0)
  3414.         {
  3415.             if (!buffer.ptr)
  3416.                 buffer = new char[4];
  3417.             buffer[0] = c;
  3418.             b++;
  3419.             lastc = dex[c - 'A'];
  3420.         }
  3421.         else
  3422.         {
  3423.             if (c == 'H' || c == 'W')
  3424.                 continue;
  3425.             if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
  3426.                 lastc = lastc.init;
  3427.             c = dex[c - 'A'];
  3428.             if (c != '0' && c != lastc)
  3429.             {
  3430.                 buffer[b] = c;
  3431.                 b++;
  3432.                 lastc = c;
  3433.             }
  3434.         }
  3435.         if (b == 4)
  3436.             goto Lret;
  3437.     }
  3438.     if (b == 0)
  3439.         buffer = null;
  3440.     else
  3441.         buffer[b .. 4] = '0';
  3442.   Lret:
  3443.     return buffer;
  3444. }
  3445.  
  3446. @safe pure nothrow unittest
  3447. {
  3448.     assertCTFEable!(
  3449.     {
  3450.     char[4] buffer;
  3451.  
  3452.     assert(soundex(null) == null);
  3453.     assert(soundex("") == null);
  3454.     assert(soundex("0123^&^^**&^") == null);
  3455.     assert(soundex("Euler") == "E460");
  3456.     assert(soundex(" Ellery ") == "E460");
  3457.     assert(soundex("Gauss") == "G200");
  3458.     assert(soundex("Ghosh") == "G200");
  3459.     assert(soundex("Hilbert") == "H416");
  3460.     assert(soundex("Heilbronn") == "H416");
  3461.     assert(soundex("Knuth") == "K530");
  3462.     assert(soundex("Kant", buffer) == "K530");
  3463.     assert(soundex("Lloyd") == "L300");
  3464.     assert(soundex("Ladd") == "L300");
  3465.     assert(soundex("Lukasiewicz", buffer) == "L222");
  3466.     assert(soundex("Lissajous") == "L222");
  3467.     assert(soundex("Robert") == "R163");
  3468.     assert(soundex("Rupert") == "R163");
  3469.     assert(soundex("Rubin") == "R150");
  3470.     assert(soundex("Washington") == "W252");
  3471.     assert(soundex("Lee") == "L000");
  3472.     assert(soundex("Gutierrez") == "G362");
  3473.     assert(soundex("Pfister") == "P236");
  3474.     assert(soundex("Jackson") == "J250");
  3475.     assert(soundex("Tymczak") == "T522");
  3476.     assert(soundex("Ashcraft") == "A261");
  3477.  
  3478.     assert(soundex("Woo") == "W000");
  3479.     assert(soundex("Pilgrim") == "P426");
  3480.     assert(soundex("Flingjingwaller") == "F452");
  3481.     assert(soundex("PEARSE") == "P620");
  3482.     assert(soundex("PIERCE") == "P620");
  3483.     assert(soundex("Price") == "P620");
  3484.     assert(soundex("CATHY") == "C300");
  3485.     assert(soundex("KATHY") == "K300");
  3486.     assert(soundex("Jones") == "J520");
  3487.     assert(soundex("johnsons") == "J525");
  3488.     assert(soundex("Hardin") == "H635");
  3489.     assert(soundex("Martinez") == "M635");
  3490.     });
  3491. }
  3492.  
  3493.  
  3494. /***************************************************
  3495.  * Construct an associative array consisting of all
  3496.  * abbreviations that uniquely map to the strings in values.
  3497.  *
  3498.  * This is useful in cases where the user is expected to type
  3499.  * in one of a known set of strings, and the program will helpfully
  3500.  * autocomplete the string once sufficient characters have been
  3501.  * entered that uniquely identify it.
  3502.  * Example:
  3503.  * ---
  3504.  * import std.stdio;
  3505.  * import std.string;
  3506.  *
  3507.  * void main()
  3508.  * {
  3509.  *    static string[] list = [ "food", "foxy" ];
  3510.  *
  3511.  *    auto abbrevs = std.string.abbrev(list);
  3512.  *
  3513.  *    foreach (key, value; abbrevs)
  3514.  *    {
  3515.  *       writefln("%s => %s", key, value);
  3516.  *    }
  3517.  * }
  3518.  * ---
  3519.  * produces the output:
  3520.  * <pre>
  3521.  * fox =&gt; foxy
  3522.  * food =&gt; food
  3523.  * foxy =&gt; foxy
  3524.  * foo =&gt; food
  3525.  * </pre>
  3526.  */
  3527.  
  3528. string[string] abbrev(string[] values) @safe pure
  3529. {
  3530.     string[string] result;
  3531.  
  3532.     // Make a copy when sorting so we follow COW principles.
  3533.     values = values.dup.sort;   // @@@BUG@@@ not CTFEable
  3534.  
  3535.     size_t values_length = values.length;
  3536.     size_t lasti = values_length;
  3537.     size_t nexti;
  3538.  
  3539.     string nv;
  3540.     string lv;
  3541.  
  3542.     for (size_t i = 0; i < values_length; i = nexti)
  3543.     {
  3544.         string value = values[i];
  3545.  
  3546.         // Skip dups
  3547.         for (nexti = i + 1; nexti < values_length; nexti++)
  3548.         {
  3549.             nv = values[nexti];
  3550.             if (value != values[nexti])
  3551.                 break;
  3552.         }
  3553.  
  3554.         for (size_t j = 0; j < value.length; j += std.utf.stride(value, j))
  3555.         {
  3556.             string v = value[0 .. j];
  3557.  
  3558.             if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) &&
  3559.                 (lasti == values_length || j > lv.length || v != lv[0 .. j]))
  3560.             {
  3561.                 result[v] = value;
  3562.             }
  3563.         }
  3564.         result[value] = value;
  3565.         lasti = i;
  3566.         lv = value;
  3567.     }
  3568.  
  3569.     return result;
  3570. }
  3571.  
  3572. unittest
  3573. {
  3574.     debug(string) printf("string.abbrev.unittest\n");
  3575.  
  3576.     // @@@BUG@@@ Built-in arr.sort is not CTFEable
  3577.     //assertCTFEable!(
  3578.     //{
  3579.     string[] values;
  3580.     values ~= "hello";
  3581.     values ~= "hello";
  3582.     values ~= "he";
  3583.  
  3584.     string[string] r;
  3585.  
  3586.     r = abbrev(values);
  3587.     auto keys = r.keys.dup;
  3588.     keys.sort;
  3589.  
  3590.     assert(keys.length == 4);
  3591.     assert(keys[0] == "he");
  3592.     assert(keys[1] == "hel");
  3593.     assert(keys[2] == "hell");
  3594.     assert(keys[3] == "hello");
  3595.  
  3596.     assert(r[keys[0]] == "he");
  3597.     assert(r[keys[1]] == "hello");
  3598.     assert(r[keys[2]] == "hello");
  3599.     assert(r[keys[3]] == "hello");
  3600.     //});
  3601. }
  3602.  
  3603.  
  3604. /******************************************
  3605.  * Compute column number after string if string starts in the
  3606.  * leftmost column, which is numbered starting from 0.
  3607.  */
  3608.  
  3609. size_t column(S)(S str, size_t tabsize = 8) @safe pure if (isSomeString!S)
  3610. {
  3611.     size_t column;
  3612.  
  3613.     foreach (dchar c; str)
  3614.     {
  3615.         switch (c)
  3616.         {
  3617.         case '\t':
  3618.             column = (column + tabsize) / tabsize * tabsize;
  3619.             break;
  3620.  
  3621.         case '\r':
  3622.         case '\n':
  3623.         case paraSep:
  3624.         case lineSep:
  3625.             column = 0;
  3626.             break;
  3627.  
  3628.         default:
  3629.             column++;
  3630.             break;
  3631.         }
  3632.     }
  3633.     return column;
  3634. }
  3635.  
  3636. unittest
  3637. {
  3638.     debug(string) printf("string.column.unittest\n");
  3639.  
  3640.     assertCTFEable!(
  3641.     {
  3642.     assert(column(string.init) == 0);
  3643.     assert(column("") == 0);
  3644.     assert(column("\t") == 8);
  3645.     assert(column("abc\t") == 8);
  3646.     assert(column("12345678\t") == 16);
  3647.     });
  3648. }
  3649.  
  3650. /******************************************
  3651.  * Wrap text into a paragraph.
  3652.  *
  3653.  * The input text string s is formed into a paragraph
  3654.  * by breaking it up into a sequence of lines, delineated
  3655.  * by \n, such that the number of columns is not exceeded
  3656.  * on each line.
  3657.  * The last line is terminated with a \n.
  3658.  * Params:
  3659.  *  s = text string to be wrapped
  3660.  *  columns = maximum number of _columns in the paragraph
  3661.  *  firstindent = string used to _indent first line of the paragraph
  3662.  *  indent = string to use to _indent following lines of the paragraph
  3663.  *  tabsize = column spacing of tabs
  3664.  * Returns:
  3665.  *  The resulting paragraph.
  3666.  */
  3667.  
  3668. S wrap(S)(S s, size_t columns = 80, S firstindent = null,
  3669.         S indent = null, size_t tabsize = 8) @safe pure if (isSomeString!S)
  3670. {
  3671.     typeof(s.dup) result;
  3672.     int spaces;
  3673.     bool inword;
  3674.     bool first = true;
  3675.     size_t wordstart;
  3676.  
  3677.     result.length = firstindent.length + s.length;
  3678.     result.length = firstindent.length;
  3679.     result[] = firstindent[];
  3680.     auto col = column(result.idup, tabsize);
  3681.     foreach (size_t i, dchar c; s)
  3682.     {
  3683.         if (std.uni.isWhite(c))
  3684.         {
  3685.             if (inword)
  3686.             {
  3687.                 if (first)
  3688.                 {
  3689.                 }
  3690.                 else if (col + 1 + (i - wordstart) > columns)
  3691.                 {
  3692.                     result ~= '\n';
  3693.                     result ~= indent;
  3694.                     col = column(indent, tabsize);
  3695.                 }
  3696.                 else
  3697.                 {
  3698.                     result ~= ' ';
  3699.                     col += 1;
  3700.                 }
  3701.                 result ~= s[wordstart .. i];
  3702.                 col += i - wordstart;
  3703.                 inword = false;
  3704.                 first = false;
  3705.             }
  3706.         }
  3707.         else
  3708.         {
  3709.             if (!inword)
  3710.             {
  3711.                 wordstart = i;
  3712.                 inword = true;
  3713.             }
  3714.         }
  3715.     }
  3716.  
  3717.     if (inword)
  3718.     {
  3719.         if (col + 1 + (s.length - wordstart) >= columns)
  3720.         {
  3721.             result ~= '\n';
  3722.             result ~= indent;
  3723.         }
  3724.         else if (result.length != firstindent.length)
  3725.             result ~= ' ';
  3726.         result ~= s[wordstart .. s.length];
  3727.     }
  3728.     result ~= '\n';
  3729.  
  3730.     return result;
  3731. }
  3732.  
  3733. unittest
  3734. {
  3735.     debug(string) printf("string.wrap.unittest\n");
  3736.  
  3737.     assertCTFEable!(
  3738.     {
  3739.     assert(wrap(string.init) == "\n");
  3740.     assert(wrap(" a b   df ") == "a b df\n");
  3741.     assert(wrap(" a b   df ", 3) == "a b\ndf\n");
  3742.     assert(wrap(" a bc   df ", 3) == "a\nbc\ndf\n");
  3743.     assert(wrap(" abcd   df ", 3) == "abcd\ndf\n");
  3744.     assert(wrap("x") == "x\n");
  3745.     assert(wrap("u u") == "u u\n");
  3746.     });
  3747. }
  3748.  
  3749. /******************************************
  3750.  * Removes indentation from a multi-line string or an array of single-line strings.
  3751.  *
  3752.  * This uniformly outdents the text as much as possible.
  3753.  * Whitespace-only lines are always converted to blank lines.
  3754.  *
  3755.  * A StringException will be thrown if inconsistent indentation prevents
  3756.  * the input from being outdented.
  3757.  *
  3758.  * Works at compile-time.
  3759.  *
  3760.  * Example:
  3761.  * ---
  3762.  * writeln(q{
  3763.  *     import std.stdio;
  3764.  *     void main() {
  3765.  *         writeln("Hello");
  3766.  *     }
  3767.  * }.outdent());
  3768.  * ---
  3769.  *
  3770.  * Output:
  3771.  * ---
  3772.  *
  3773.  * import std.stdio;
  3774.  * void main() {
  3775.  *     writeln("Hello");
  3776.  * }
  3777.  *
  3778.  * ---
  3779.  *
  3780.  */
  3781.  
  3782. S outdent(S)(S str) @safe pure if(isSomeString!S)
  3783. {
  3784.     return str.splitLines(KeepTerminator.yes).outdent().join();
  3785. }
  3786.  
  3787. /// ditto
  3788. S[] outdent(S)(S[] lines) @safe pure if(isSomeString!S)
  3789. {
  3790.     if (lines.empty)
  3791.     {
  3792.         return null;
  3793.     }
  3794.  
  3795.     static S leadingWhiteOf(S str)
  3796.     {
  3797.         return str[ 0 .. $-find!(not!(std.uni.isWhite))(str).length ];
  3798.     }
  3799.  
  3800.     S shortestIndent;
  3801.     foreach (i, line; lines)
  3802.     {
  3803.         auto stripped = __ctfe? line.ctfe_strip() : line.strip();
  3804.  
  3805.         if (stripped.empty)
  3806.         {
  3807.             lines[i] = line[line.chomp().length..$];
  3808.         }
  3809.         else
  3810.         {
  3811.             auto indent = leadingWhiteOf(line);
  3812.  
  3813.             // Comparing number of code units instead of code points is OK here
  3814.             // because this function throws upon inconsistent indentation.
  3815.             if (shortestIndent is null || indent.length < shortestIndent.length)
  3816.             {
  3817.                 if (indent.empty)
  3818.                     return lines;
  3819.                 shortestIndent = indent;
  3820.             }
  3821.         }
  3822.     }
  3823.  
  3824.     foreach (i; 0..lines.length)
  3825.     {
  3826.         auto stripped = __ctfe? lines[i].ctfe_strip() : lines[i].strip();
  3827.  
  3828.         if (stripped.empty)
  3829.         {
  3830.             // Do nothing
  3831.         }
  3832.         else if (lines[i].startsWith(shortestIndent))
  3833.         {
  3834.             lines[i] = lines[i][shortestIndent.length..$];
  3835.         }
  3836.         else
  3837.         {
  3838.             if (__ctfe)
  3839.                 assert(false, "outdent: Inconsistent indentation");
  3840.             else
  3841.                 throw new StringException("outdent: Inconsistent indentation");
  3842.         }
  3843.     }
  3844.  
  3845.     return lines;
  3846. }
  3847.  
  3848. // TODO: Remove this and use std.string.strip when retro() becomes ctfe-able.
  3849. private S ctfe_strip(S)(S str) if(isSomeString!(Unqual!S))
  3850. {
  3851.     return str.stripLeft().ctfe_stripRight();
  3852. }
  3853.  
  3854. // TODO: Remove this and use std.string.strip when retro() becomes ctfe-able.
  3855. private S ctfe_stripRight(S)(S str) if(isSomeString!(Unqual!S))
  3856. {
  3857.     size_t endIndex = 0;
  3858.     size_t prevIndex = str.length;
  3859.  
  3860.     foreach_reverse (i, dchar ch; str)
  3861.     {
  3862.         if (!std.uni.isWhite(ch))
  3863.         {
  3864.             endIndex = prevIndex;
  3865.             break;
  3866.         }
  3867.         prevIndex = i;
  3868.     }
  3869.  
  3870.     return str[0..endIndex];
  3871. }
  3872.  
  3873. unittest
  3874. {
  3875.     debug(string) printf("string.outdent.unittest\n");
  3876.  
  3877.     template outdent_testStr(S)
  3878.     {
  3879.         enum S outdent_testStr =
  3880. "
  3881. \t\tX
  3882. \t\U00010143X
  3883. \t\t
  3884.  
  3885. \t\t\tX
  3886. \t ";
  3887.     }
  3888.  
  3889.     template outdent_expected(S)
  3890.     {
  3891.         enum S outdent_expected =
  3892. "
  3893. \tX
  3894. \U00010143X
  3895.  
  3896.  
  3897. \t\tX
  3898. ";
  3899.     }
  3900.  
  3901.     assertCTFEable!(
  3902.     {
  3903.     static assert(ctfe_strip(" \tHi \r\n") == "Hi");
  3904.     static assert(ctfe_strip(" \tHi&copy;\u2028 \r\n") == "Hi&copy;");
  3905.     static assert(ctfe_strip("Hi")         == "Hi");
  3906.     static assert(ctfe_strip(" \t \r\n")   == "");
  3907.     static assert(ctfe_strip("")           == "");
  3908.  
  3909.     foreach (S; TypeTuple!(string, wstring, dstring))
  3910.     {
  3911.         enum S blank = "";
  3912.         assert(blank.outdent() == blank);
  3913.         static assert(blank.outdent() == blank);
  3914.  
  3915.         enum S testStr1  = " \n \t\n ";
  3916.         enum S expected1 = "\n\n";
  3917.         assert(testStr1.outdent() == expected1);
  3918.         static assert(testStr1.outdent() == expected1);
  3919.  
  3920.         assert(testStr1[0..$-1].outdent() == expected1);
  3921.         static assert(testStr1[0..$-1].outdent() == expected1);
  3922.  
  3923.         enum S testStr2  = "a\n \t\nb";
  3924.         assert(testStr2.outdent() == testStr2);
  3925.         static assert(testStr2.outdent() == testStr2);
  3926.  
  3927.         enum S testStr3 =
  3928. "
  3929. \t\tX
  3930. \t\U00010143X
  3931. \t\t
  3932.  
  3933. \t\t\tX
  3934. \t ";
  3935.  
  3936.         enum S expected3 =
  3937. "
  3938. \tX
  3939. \U00010143X
  3940.  
  3941.  
  3942. \t\tX
  3943. ";
  3944.         assert(testStr3.outdent() == expected3);
  3945.         static assert(testStr3.outdent() == expected3);
  3946.  
  3947.         enum testStr4 = "  X\r  X\n  X\r\n  X\u2028  X\u2029  X";
  3948.         enum expected4 = "X\rX\nX\r\nX\u2028X\u2029X";
  3949.         assert(testStr4.outdent() == expected4);
  3950.         static assert(testStr4.outdent() == expected4);
  3951.  
  3952.         enum testStr5  = testStr4[0..$-1];
  3953.         enum expected5 = expected4[0..$-1];
  3954.         assert(testStr5.outdent() == expected5);
  3955.         static assert(testStr5.outdent() == expected5);
  3956.  
  3957.         enum testStr6 = "  \r  \n  \r\n  \u2028  \u2029";
  3958.         enum expected6 = "\r\n\r\n\u2028\u2029";
  3959.         assert(testStr6.outdent() == expected6);
  3960.         static assert(testStr6.outdent() == expected6);
  3961.     }
  3962.     });
  3963. }
  3964. debug(string)void main(){}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement