Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Written in the D programming language.
- /**
- String handling functions. Objects of types $(D _string), $(D
- wstring), and $(D dstring) are value types and cannot be mutated
- element-by-element. For using mutation during building strings, use
- $(D char[]), $(D wchar[]), or $(D dchar[]). The $(D *_string) types
- are preferable because they don't exhibit undesired aliasing, thus
- making code more robust.
- Macros: WIKI = Phobos/StdString
- Copyright: Copyright Digital Mars 2007-.
- License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
- Authors: $(WEB digitalmars.com, Walter Bright),
- $(WEB erdani.org, Andrei Alexandrescu),
- and Jonathan M Davis
- Source: $(PHOBOSSRC std/_string.d)
- */
- module std.string;
- debug=string; // uncomment to turn on debugging printf's
- debug(string) import core.stdc.stdio;
- import core.exception : RangeError, onRangeError;
- import core.vararg, core.stdc.stdlib, core.stdc.string,
- std.algorithm, std.ascii, std.conv, std.exception, std.format, std.functional,
- std.range, std.traits,
- std.typecons, std.typetuple, std.uni, std.utf;
- //Remove when repeat is finally removed. They're only here as part of the
- //deprecation of these functions in std.string.
- public import std.algorithm : startsWith, endsWith, cmp, count;
- public import std.array : join, split;
- version(Windows) extern (C)
- {
- size_t wcslen(in wchar *);
- int wcscmp(in wchar *, in wchar *);
- }
- version(unittest) import std.algorithm : filter;
- /* ************* Exceptions *************** */
- /++
- Exception thrown on errors in std.string functions.
- +/
- class StringException : Exception
- {
- /++
- Params:
- msg = The message for the exception.
- file = The file where the exception occurred.
- line = The line number where the exception occurred.
- next = The previous exception in the chain of exceptions, if any.
- +/
- this(string msg,
- string file = __FILE__,
- size_t line = __LINE__,
- Throwable next = null) @safe pure nothrow
- {
- super(msg, file, line, next);
- }
- }
- /++
- Compares two ranges of characters lexicographically. The comparison is
- case insensitive. Use $(XREF algorithm, cmp) for a case sensitive
- comparison. For details see $(XREF uni, _icmp).
- $(BOOKTABLE,
- $(TR $(TD $(D < 0)) $(TD $(D s1 < s2) ))
- $(TR $(TD $(D = 0)) $(TD $(D s1 == s2)))
- $(TR $(TD $(D > 0)) $(TD $(D s1 > s2)))
- )
- +/
- alias icmp = std.uni.icmp;
- unittest
- {
- debug(string) printf("string.icmp.unittest\n");
- assertCTFEable!(
- {
- assert(icmp("Ü", "ü") == 0, "Über failure");
- assert(icmp("abc", "abc") == 0);
- assert(icmp("ABC", "abc") == 0);
- assert(icmp("abc"w, "abc") == 0);
- assert(icmp("ABC", "abc"w) == 0);
- assert(icmp("abc"d, "abc") == 0);
- assert(icmp("ABC", "abc"d) == 0);
- assert(icmp(cast(char[])"abc", "abc") == 0);
- assert(icmp("ABC", cast(char[])"abc") == 0);
- assert(icmp(cast(wchar[])"abc"w, "abc") == 0);
- assert(icmp("ABC", cast(wchar[])"abc"w) == 0);
- assert(icmp(cast(dchar[])"abc"d, "abc") == 0);
- assert(icmp("ABC", cast(dchar[])"abc"d) == 0);
- assert(icmp(cast(string)null, cast(string)null) == 0);
- assert(icmp("", "") == 0);
- assert(icmp("abc", "abcd") < 0);
- assert(icmp("abcd", "abc") > 0);
- assert(icmp("abc", "abd") < 0);
- assert(icmp("bbc", "abc") > 0);
- assert(icmp("abc", "abc"w) == 0);
- assert(icmp("ABC"w, "abc") == 0);
- assert(icmp("", ""w) == 0);
- assert(icmp("abc"w, "abcd") < 0);
- assert(icmp("abcd", "abc"w) > 0);
- assert(icmp("abc", "abd") < 0);
- assert(icmp("bbc"w, "abc") > 0);
- assert(icmp("aaa", "aaaa"d) < 0);
- assert(icmp("aaaa"w, "aaa"d) > 0);
- assert(icmp("aaa"d, "aaa"w) == 0);
- assert(icmp("\u0430\u0411\u0543"d, "\u0430\u0411\u0543") == 0);
- assert(icmp("\u0430\u0411\u0543"d, "\u0431\u0410\u0544") < 0);
- assert(icmp("\u0431\u0411\u0544"d, "\u0431\u0410\u0543") > 0);
- assert(icmp("\u0430\u0410\u0543"d, "\u0430\u0410\u0544") < 0);
- assert(icmp("\u0430\u0411\u0543"d, "\u0430\u0411\u0543\u0237") < 0);
- assert(icmp("\u0430\u0411\u0543\u0237"d, "\u0430\u0411\u0543") > 0);
- assert(icmp("aaa", filter!"true"("aaa")) == 0);
- assert(icmp(filter!"true"("aaa"), "aaa") == 0);
- assert(icmp(filter!"true"("aaa"), filter!"true"("aaa")) == 0);
- assert(icmp(filter!"true"("\u0430\u0411\u0543"d), "\u0430\u0411\u0543") == 0);
- assert(icmp(filter!"true"("\u0430\u0411\u0543"d), "\u0431\u0410\u0544"w) < 0);
- assert(icmp("\u0431\u0411\u0544"d, filter!"true"("\u0431\u0410\u0543"w)) > 0);
- assert(icmp("\u0430\u0410\u0543"d, filter!"true"("\u0430\u0410\u0544")) < 0);
- assert(icmp(filter!"true"("\u0430\u0411\u0543"d), filter!"true"("\u0430\u0411\u0543\u0237")) < 0);
- assert(icmp(filter!"true"("\u0430\u0411\u0543\u0237"d), filter!"true"("\u0430\u0411\u0543")) > 0);
- });
- }
- /++
- Returns a C-style zero-terminated string equivalent to $(D s). $(D s)
- must not contain embedded $(D '\0')'s as any C function will treat the first
- $(D '\0') that it sees as the end of the string. If $(D s.empty) is
- $(D true), then a string containing only $(D '\0') is returned.
- $(RED Important Note:) When passing a $(D char*) to a C function, and the C
- function keeps it around for any reason, make sure that you keep a reference
- to it in your D code. Otherwise, it may go away during a garbage collection
- cycle and cause a nasty bug when the C code tries to use it.
- +/
- immutable(char)* toStringz(const(char)[] s) pure nothrow
- in
- {
- // The assert below contradicts the unittests!
- //assert(memchr(s.ptr, 0, s.length) == null,
- //text(s.length, ": `", s, "'"));
- }
- out (result)
- {
- if (result)
- {
- auto slen = s.length;
- while (slen > 0 && s[slen-1] == 0) --slen;
- assert(strlen(result) == slen);
- assert(memcmp(result, s.ptr, slen) == 0);
- }
- }
- body
- {
- /+ Unfortunately, this isn't reliable.
- We could make this work if string literals are put
- in read-only memory and we test if s[] is pointing into
- that.
- /* Peek past end of s[], if it's 0, no conversion necessary.
- * Note that the compiler will put a 0 past the end of static
- * strings, and the storage allocator will put a 0 past the end
- * of newly allocated char[]'s.
- */
- char* p = &s[0] + s.length;
- if (*p == 0)
- return s;
- +/
- // Need to make a copy
- auto copy = new char[s.length + 1];
- copy[0..s.length] = s[];
- copy[s.length] = 0;
- return assumeUnique(copy).ptr;
- }
- /++ Ditto +/
- immutable(char)* toStringz(string s) pure nothrow
- {
- if (s.empty) return "".ptr;
- /* Peek past end of s[], if it's 0, no conversion necessary.
- * Note that the compiler will put a 0 past the end of static
- * strings, and the storage allocator will put a 0 past the end
- * of newly allocated char[]'s.
- */
- immutable p = s.ptr + s.length;
- // Is p dereferenceable? A simple test: if the p points to an
- // address multiple of 4, then conservatively assume the pointer
- // might be pointing to a new block of memory, which might be
- // unreadable. Otherwise, it's definitely pointing to valid
- // memory.
- if ((cast(size_t) p & 3) && *p == 0)
- return s.ptr;
- return toStringz(cast(const char[]) s);
- }
- unittest
- {
- debug(string) printf("string.toStringz.unittest\n");
- // TODO: CTFEable toStringz is really necessary?
- //assertCTFEable!(
- //{
- auto p = toStringz("foo");
- assert(strlen(p) == 3);
- const(char)[] foo = "abbzxyzzy";
- p = toStringz(foo[3..5]);
- assert(strlen(p) == 2);
- string test = "";
- p = toStringz(test);
- assert(*p == 0);
- test = "\0";
- p = toStringz(test);
- assert(*p == 0);
- test = "foo\0";
- p = toStringz(test);
- assert(p[0] == 'f' && p[1] == 'o' && p[2] == 'o' && p[3] == 0);
- //});
- }
- ///convert a null terminated string in a D slice.No heap allocation is made
- T[]fromStringz(T)(T*str,T term=T.init)pure nothrow
- {
- return StringzRange(str,term).array;
- }
- ///An input range iterating over a null terminated string.Because it does *not*
- ///compute the total length of the string it can be faster than fromStringz when
- ///you need only a input and a Forward range and nothing more.
- struct StringzRange(T)
- {
- public:
- ///the pointer to the begin of the string
- T*ptr;
- ///the terminating character
- immutable T back;
- ///the default constructor.T.init is the null character when T is char
- this(T*pointer,T terminator=T.init)pure nothrow
- {
- ptr=pointer;
- back=terminator;
- }
- ///the input range interface
- @propriety bool empty()pure nothrow
- {
- return*ptr==back;
- }
- ///ditto
- @propriety ref inout(T) front()inout pure nothrow
- {
- return*ptr;
- }
- ///ditto
- void popFront()pure nothrow
- {
- ++ptr;
- }
- ///the Forward range interface
- @propriety inout(StringzRange!T)save()inout pure nothrow
- {
- return this;
- }
- ///convert the null terminated string in a D slice on the fly.No heap allocation is made
- @propriety T[]array()nothrow
- {
- T[]result;
- result.ptr=ptr;
- static if(is(T:const char))
- result.length=std.c.string.strlen(ptr);
- else
- result.length=walkLength(this);
- return result;
- }/*TODO:really needed implicit conversion to array?
- ///Rest of range interface
- alias array this;*/
- }
- unittest{}//TODO
- /**
- Flag indicating whether a search is case-sensitive.
- */
- enum CaseSensitive { no, yes }
- /++
- Returns the index of the first occurrence of $(D c) in $(D s). If $(D c)
- is not found, then $(D -1) is returned.
- $(D cs) indicates whether the comparisons are case sensitive.
- +/
- ptrdiff_t indexOf(S)(S s,ElementType!S c,CaseSensitive cs = CaseSensitive.yes) @safe pure
- if (isSomeString!S||( isInputRange!S && isSomeChar!(ElementType!S)))
- {
- alias Char=ElementType!S;
- if (cs == CaseSensitive.yes)
- {
- static if (is(S==Char[]) && Char.sizeof == 1)
- {
- if (std.ascii.isASCII(c) && !__ctfe)
- { // Plain old ASCII
- auto trustedmemchr() @trusted { return cast(Char*)memchr(s.ptr, c, s.length); }
- auto p = trustedmemchr();
- if (p)
- return p - s.ptr;
- else
- return -1;
- }
- }
- // c is a universal character
- foreach (ptrdiff_t i, dchar c2; s)
- {
- if (c == c2)
- return i;
- }
- }
- else
- {
- if (std.ascii.isASCII(c))
- { // Plain old ASCII
- auto c1 = cast(char) std.ascii.toLower(c);
- foreach (ptrdiff_t i, c2; s)
- {
- auto c3 = std.ascii.toLower(c2);
- if (c1 == c3)
- return i;
- }
- }
- else
- { // c is a universal character
- auto c1 = std.uni.toLower(c);
- foreach (ptrdiff_t i, dchar c2; s)
- {
- auto c3 = std.uni.toLower(c2);
- if (c1 == c3)
- return i;
- }
- }
- }
- return -1;
- }
- unittest
- {
- debug(string) printf("string.indexOf.unittest\n");
- assertCTFEable!(
- {
- foreach (S; TypeTuple!(string, wstring, dstring))
- {
- assert(indexOf(cast(S)null, cast(dchar)'a') == -1);
- assert(indexOf(to!S("def"), cast(dchar)'a') == -1);
- assert(indexOf(to!S("abba"), cast(dchar)'a') == 0);
- assert(indexOf(to!S("def"), cast(dchar)'f') == 2);
- assert(indexOf(to!S("def"), cast(dchar)'a', CaseSensitive.no) == -1);
- assert(indexOf(to!S("def"), cast(dchar)'a', CaseSensitive.no) == -1);
- assert(indexOf(to!S("Abba"), cast(dchar)'a', CaseSensitive.no) == 0);
- assert(indexOf(to!S("def"), cast(dchar)'F', CaseSensitive.no) == 2);
- assert(indexOf(to!S("ödef"), 'ö', CaseSensitive.no) == 0);
- S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
- assert(indexOf("def", cast(char)'f', CaseSensitive.no) == 2);
- assert(indexOf(sPlts, cast(char)'P', CaseSensitive.no) == 23);
- assert(indexOf(sPlts, cast(char)'R', CaseSensitive.no) == 2);
- }
- foreach (cs; EnumMembers!CaseSensitive)
- {
- assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', cs) == 9);
- assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', cs) == 7);
- assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', cs) == 6);
- }
- });
- }
- /++
- Returns the index of the first occurrence of $(D c) in $(D s) with respect
- to the start index $(D startIdx). If $(D c) is not found, then $(D -1) is
- returned. If $(D c) is found the value of the returned index is at least
- $(D startIdx). $(D startIdx) represents a codeunit index in $(D s). If the
- sequence starting at $(D startIdx) does not represent a well formed codepoint,
- then a $(XREF utf,UTFException) may be thrown.
- $(D cs) indicates whether the comparisons are case sensitive.
- +/
- ptrdiff_t indexOf(S)(S s, ElementType!S c, const size_t startIdx,
- CaseSensitive cs = CaseSensitive.yes) @safe pure
- if (isSomeString!S||(isInputRange!S && isSomeChar!(ElementType!S)))
- {
- static if(hasLenght!S)
- {
- if (startIdx >= s.length)return -1;
- s.popFrontExactly(startIdx);
- }
- else
- {
- s.popFrontN(startIdx);
- }
- ptrdiff_t foundIdx = indexOf(s, c, cs);
- if (foundIdx == -1)return-1;
- return foundIdx + cast(ptrdiff_t)startIdx;
- }
- unittest
- {
- debug(string) printf("string.indexOf(startIdx).unittest\n");
- foreach (S; TypeTuple!(string, wstring, dstring))
- {
- assert(indexOf(cast(S)null, cast(dchar)'a', 1) == -1);
- assert(indexOf(to!S("def"), cast(dchar)'a', 1) == -1);
- assert(indexOf(to!S("abba"), cast(dchar)'a', 1) == 3);
- assert(indexOf(to!S("def"), cast(dchar)'f', 1) == 2);
- assert((to!S("def")).indexOf(cast(dchar)'a', 1,
- CaseSensitive.no) == -1);
- assert(indexOf(to!S("def"), cast(dchar)'a', 1,
- CaseSensitive.no) == -1);
- assert(indexOf(to!S("def"), cast(dchar)'a', 12,
- CaseSensitive.no) == -1);
- assert(indexOf(to!S("AbbA"), cast(dchar)'a', 2,
- CaseSensitive.no) == 3);
- assert(indexOf(to!S("def"), cast(dchar)'F', 2, CaseSensitive.no) == 2);
- S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
- assert(indexOf("def", cast(char)'f', cast(uint)2,
- CaseSensitive.no) == 2);
- assert(indexOf(sPlts, cast(char)'P', 12, CaseSensitive.no) == 23);
- assert(indexOf(sPlts, cast(char)'R', cast(ulong)1,
- CaseSensitive.no) == 2);
- }
- foreach(cs; EnumMembers!CaseSensitive)
- {
- assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', 2, cs)
- == 9);
- assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', 3, cs)
- == 7);
- assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', 6, cs)
- == 6);
- }
- }
- /++
- Returns the index of the first occurrence of $(D sub) in $(D s). If $(D sub)
- is not found, then $(D -1) is returned.
- $(D cs) indicates whether the comparisons are case sensitive.
- +/
- ptrdiff_t indexOf(S1, S2)(S1 s,S2 sub,CaseSensitive cs = CaseSensitive.yes)
- if (is(Unqual!(ElementType!S1)==Unqual!(ElementType!S2))
- &&(isSomeString!S1||(isSomeChar!(ElementType!S1)
- &&isForwardRange!S1&&isForwardRange!S2)))
- {
- S1 balance;
- if (cs == CaseSensitive.yes)
- {
- balance = std.algorithm.find(s, sub);
- }
- else
- {
- balance = std.algorithm.find!
- ((a, b) => std.uni.toLower(a) == std.uni.toLower(b))
- (s, sub);
- }
- if(balance.empty)return-1;
- static if(is(S1==ElementType!S1[]))
- return s.ptr-balance.ptr;
- return indexOf(s,balance.front,cs);//TODO:implement a more efficient algorithm for this case
- }
- unittest
- {
- debug(string) printf("string.indexOf.unittest\n");
- assertCTFEable!(
- {
- foreach (S; TypeTuple!(string, wstring, dstring))
- {
- foreach (T; TypeTuple!(string, wstring, dstring))
- {
- assert(indexOf(cast(S)null, to!T("a")) == -1);
- assert(indexOf(to!S("def"), to!T("a")) == -1);
- assert(indexOf(to!S("abba"), to!T("a")) == 0);
- assert(indexOf(to!S("def"), to!T("f")) == 2);
- assert(indexOf(to!S("dfefffg"), to!T("fff")) == 3);
- assert(indexOf(to!S("dfeffgfff"), to!T("fff")) == 6);
- assert(indexOf(to!S("dfeffgfff"), to!T("a"), CaseSensitive.no) == -1);
- assert(indexOf(to!S("def"), to!T("a"), CaseSensitive.no) == -1);
- assert(indexOf(to!S("abba"), to!T("a"), CaseSensitive.no) == 0);
- assert(indexOf(to!S("def"), to!T("f"), CaseSensitive.no) == 2);
- assert(indexOf(to!S("dfefffg"), to!T("fff"), CaseSensitive.no) == 3);
- assert(indexOf(to!S("dfeffgfff"), to!T("fff"), CaseSensitive.no) == 6);
- S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
- S sMars = "Who\'s \'My Favorite Maritian?\'";
- assert(indexOf(sMars, to!T("MY fAVe"), CaseSensitive.no) == -1);
- assert(indexOf(sMars, to!T("mY fAVOriTe"), CaseSensitive.no) == 7);
- assert(indexOf(sPlts, to!T("mArS:"), CaseSensitive.no) == 0);
- assert(indexOf(sPlts, to!T("rOcK"), CaseSensitive.no) == 17);
- assert(indexOf(sPlts, to!T("Un."), CaseSensitive.no) == 41);
- assert(indexOf(sPlts, to!T(sPlts), CaseSensitive.no) == 0);
- assert(indexOf("\u0100", to!T("\u0100"), CaseSensitive.no) == 0);
- // Thanks to Carlos Santander B. and zwang
- assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
- to!T("page-break-before"), CaseSensitive.no) == -1);
- }
- foreach (cs; EnumMembers!CaseSensitive)
- {
- assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), cs) == 9);
- assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), cs) == 7);
- assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), cs) == 6);
- }
- }
- });
- }
- /++
- Returns the index of the first occurrence of $(D sub) in $(D s) with
- respect to the start index $(D startIdx). If $(D sub) is not found, then
- $(D -1) is returned. If $(D sub) is found the value of the returned index
- is at least $(D startIdx). $(D startIdx) represents a codeunit index in
- $(D s). If the sequence starting at $(D startIdx) does not represent a well
- formed codepoint, then a $(XREF utf,UTFException) may be thrown.
- $(D cs) indicates whether the comparisons are case sensitive.
- +/
- ptrdiff_t indexOf(S1, S2)(S1 s, S2 sub,
- const size_t startIdx, CaseSensitive cs = CaseSensitive.yes)
- if (isSomeString!S||(isForwardRange!S1
- && isForwardRange!S2 && isSomeChar!(ElementType!S1)))
- {
- static if(hasLenght!S1)
- {
- if (startIdx >= s.length)return -1;
- s.popFrontExactly(startIdx);
- }
- else
- {
- s.popFrontN(startIdx);
- }
- ptrdiff_t foundIdx = indexOf(s,sub, cs);
- if (foundIdx == -1)return-1;
- return foundIdx + cast(ptrdiff_t)startIdx;
- }
- unittest
- {
- debug(string) printf("string.indexOf(startIdx).unittest\n");
- foreach(S; TypeTuple!(string, wstring, dstring))
- {
- foreach(T; TypeTuple!(string, wstring, dstring))
- {
- assert(indexOf(cast(S)null, to!T("a"), 1337) == -1);
- assert(indexOf(to!S("def"), to!T("a"), 0) == -1);
- assert(indexOf(to!S("abba"), to!T("a"), 2) == 3);
- assert(indexOf(to!S("def"), to!T("f"), 1) == 2);
- assert(indexOf(to!S("dfefffg"), to!T("fff"), 1) == 3);
- assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 5) == 6);
- assert(indexOf(to!S("dfeffgfff"), to!T("a"), 1, CaseSensitive.no) == -1);
- assert(indexOf(to!S("def"), to!T("a"), 2, CaseSensitive.no) == -1);
- assert(indexOf(to!S("abba"), to!T("a"), 3, CaseSensitive.no) == 3);
- assert(indexOf(to!S("def"), to!T("f"), 1, CaseSensitive.no) == 2);
- assert(indexOf(to!S("dfefffg"), to!T("fff"), 2, CaseSensitive.no) == 3);
- assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 4, CaseSensitive.no) == 6);
- assert(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, CaseSensitive.no) == 9,
- to!string(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, CaseSensitive.no))
- ~ " " ~ S.stringof ~ " " ~ T.stringof);
- S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
- S sMars = "Who\'s \'My Favorite Maritian?\'";
- assert(indexOf(sMars, to!T("MY fAVe"), 10,
- CaseSensitive.no) == -1);
- assert(indexOf(sMars, to!T("mY fAVOriTe"), 4, CaseSensitive.no) == 7);
- assert(indexOf(sPlts, to!T("mArS:"), 0, CaseSensitive.no) == 0);
- assert(indexOf(sPlts, to!T("rOcK"), 12, CaseSensitive.no) == 17);
- assert(indexOf(sPlts, to!T("Un."), 32, CaseSensitive.no) == 41);
- assert(indexOf(sPlts, to!T(sPlts), 0, CaseSensitive.no) == 0);
- assert(indexOf("\u0100", to!T("\u0100"), 0, CaseSensitive.no) == 0);
- // Thanks to Carlos Santander B. and zwang
- assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
- to!T("page-break-before"), 10, CaseSensitive.no) == -1);
- // In order for indexOf with and without index to be consistent
- assert(indexOf(to!S(""), to!T("")) == indexOf(to!S(""), to!T(""), 0));
- }
- foreach(cs; EnumMembers!CaseSensitive)
- {
- assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"),
- 3, cs) == 9);
- assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"),
- 3, cs) == 7);
- assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"),
- 3, cs) == 6);
- }
- }
- }
- /++
- Returns the index of the last occurrence of $(D c) in $(D s). If $(D c)
- is not found, then $(D -1) is returned.
- $(D cs) indicates whether the comparisons are case sensitive.
- +/
- ptrdiff_t lastIndexOf(S)(S s,ElementType!S c,CaseSensitive cs = CaseSensitive.yes) @safe pure
- if (isSomeString!S||(isBidirectionalRange!S
- &&hasLenght!S&&isSomeChar!(ElementType!S)))
- {
- uint i=indexOf(s.reverse(),c,cs);
- return i==-1?-1:s.length-i-1;
- }
- unittest
- {
- debug(string) printf("string.lastIndexOf.unittest\n");
- assertCTFEable!(
- {
- foreach (S; TypeTuple!(string, wstring, dstring))
- {
- assert(lastIndexOf(cast(S) null, 'a') == -1);
- assert(lastIndexOf(to!S("def"), 'a') == -1);
- assert(lastIndexOf(to!S("abba"), 'a') == 3);
- assert(lastIndexOf(to!S("def"), 'f') == 2);
- assert(lastIndexOf(to!S("ödef"), 'ö') == 0);
- assert(lastIndexOf(cast(S) null, 'a', CaseSensitive.no) == -1);
- assert(lastIndexOf(to!S("def"), 'a', CaseSensitive.no) == -1);
- assert(lastIndexOf(to!S("AbbA"), 'a', CaseSensitive.no) == 3);
- assert(lastIndexOf(to!S("def"), 'F', CaseSensitive.no) == 2);
- assert(lastIndexOf(to!S("ödef"), 'ö', CaseSensitive.no) == 0);
- assert(lastIndexOf(to!S("i\u0100def"), to!dchar("\u0100"),
- CaseSensitive.no) == 1);
- S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
- assert(lastIndexOf(to!S("def"), 'f', CaseSensitive.no) == 2);
- assert(lastIndexOf(sPlts, 'M', CaseSensitive.no) == 34);
- assert(lastIndexOf(sPlts, 'S', CaseSensitive.no) == 40);
- }
- foreach (cs; EnumMembers!CaseSensitive)
- {
- assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
- assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
- assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
- }
- });
- }
- /++
- Returns the index of the last occurrence of $(D c) in $(D s). If $(D c) is
- not found, then $(D -1) is returned. The $(D startIdx) slices $(D s) in
- the following way $(D s[0 .. startIdx]). $(D startIdx) represents a
- codeunit index in $(D s). If the sequence ending at $(D startIdx) does not
- represent a well formed codepoint, then a $(XREF utf,UTFException) may be
- thrown.
- $(D cs) indicates whether the comparisons are case sensitive.
- +/
- ptrdiff_t lastIndexOf(S)(S s, ElementType!S c, const size_t startIdx,
- CaseSensitive cs = CaseSensitive.yes) @safe pure
- if (isSomeString!S||(isBidirectionalRange!S && hasLenght!S && isSomeChar!S))
- {
- uint i=indexOf(s.reverse(),c,startIdx,cs);
- return i==-1?-1:s.length-i-1;
- }
- unittest
- {
- debug(string) printf("string.lastIndexOf.unittest\n");
- foreach(S; TypeTuple!(string, wstring, dstring))
- {
- assert(lastIndexOf(cast(S) null, 'a') == -1);
- assert(lastIndexOf(to!S("def"), 'a') == -1);
- assert(lastIndexOf(to!S("abba"), 'a', 3) == 0);
- assert(lastIndexOf(to!S("deff"), 'f', 3) == 2);
- assert(lastIndexOf(cast(S) null, 'a', CaseSensitive.no) == -1);
- assert(lastIndexOf(to!S("def"), 'a', CaseSensitive.no) == -1);
- assert(lastIndexOf(to!S("AbbAa"), 'a', to!ushort(4), CaseSensitive.no) == 3,
- to!string(lastIndexOf(to!S("AbbAa"), 'a', 4, CaseSensitive.no)));
- assert(lastIndexOf(to!S("def"), 'F', 3, CaseSensitive.no) == 2);
- S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
- assert(lastIndexOf(to!S("def"), 'f', 4, CaseSensitive.no) == -1);
- assert(lastIndexOf(sPlts, 'M', sPlts.length -2, CaseSensitive.no) == 34);
- assert(lastIndexOf(sPlts, 'S', sPlts.length -2, CaseSensitive.no) == 40);
- }
- foreach(cs; EnumMembers!CaseSensitive)
- {
- assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
- assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
- assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
- }
- }
- /++
- Returns the index of the last occurrence of $(D sub) in $(D s). If $(D sub)
- is not found, then $(D -1) is returned.
- $(D cs) indicates whether the comparisons are case sensitive.
- +/
- ptrdiff_t lastIndexOf(S1,S2)(S1 s,S2 sub,CaseSensitive cs = CaseSensitive.yes) @safe pure
- if(isBidirectionalRange!S1 && isBidirectionalRange!S2
- && isForwardRange!S1 && isForwardRange!S2 &&hasLenght!S1)
- {
- uint i=indexOf(s.reverse(),sub.reverse(),cs);
- return i==-1?-1:s.length-i-1;
- }
- unittest
- {
- debug(string) printf("string.lastIndexOf.unittest\n");
- assertCTFEable!(
- {
- foreach (S; TypeTuple!(string, wstring, dstring))
- {
- foreach (T; TypeTuple!(string, wstring, dstring))
- {
- enum typeStr = S.stringof ~ " " ~ T.stringof;
- assert(lastIndexOf(cast(S)null, to!T("a")) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("c")) == 6, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd")) == 6, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef")) == 8, typeStr);
- assert(lastIndexOf(to!S("abcdefCdef"), to!T("c")) == 2, typeStr);
- assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd")) == 2, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("x")) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy")) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("")) == 10, typeStr);
- assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö")) == 0, typeStr);
- assert(lastIndexOf(cast(S)null, to!T("a"), CaseSensitive.no) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), CaseSensitive.no) == 6, typeStr);
- assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), CaseSensitive.no) == 6, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"), CaseSensitive.no) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy"), CaseSensitive.no) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), CaseSensitive.no) == 10, typeStr);
- assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö"), CaseSensitive.no) == 0, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), CaseSensitive.no) == 6, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), CaseSensitive.no) == 6, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), CaseSensitive.no) == 7, typeStr);
- assert(lastIndexOf(to!S("ödfeffgfff"), to!T("ö"), CaseSensitive.yes) == 0);
- S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
- S sMars = "Who\'s \'My Favorite Maritian?\'";
- assert(lastIndexOf(sMars, to!T("RiTE maR"), CaseSensitive.no) == 14, typeStr);
- assert(lastIndexOf(sPlts, to!T("FOuRTh"), CaseSensitive.no) == 10, typeStr);
- assert(lastIndexOf(sMars, to!T("whO\'s \'MY"), CaseSensitive.no) == 0, typeStr);
- assert(lastIndexOf(sMars, to!T(sMars), CaseSensitive.no) == 0, typeStr);
- }
- foreach (cs; EnumMembers!CaseSensitive)
- {
- enum csString = to!string(cs);
- assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), cs) == 4, csString);
- assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), cs) == 2, csString);
- assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), cs) == 1, csString);
- }
- }
- });
- }
- unittest{}//TODO
- /++
- Returns the index of the last occurrence of $(D sub) in $(D s). If $(D sub)
- is not found, then $(D -1) is returned. The $(D startIdx) slices $(D s) in
- the following way $(D s[0 .. startIdx]). $(D startIdx) represents a
- codeunit index in $(D s). If the sequence ending at $(D startIdx) does not
- represent a well formed codepoint, then a $(XREF utf,UTFException) may be
- thrown.
- $(D cs) indicates whether the comparisons are case sensitive.
- +/
- ptrdiff_t lastIndexOf(S1, S2)(S1 s, S2 sub,
- const size_t startIdx, CaseSensitive cs = CaseSensitive.yes) @safe pure
- if (isSomeString!S1||(isBidirectionalRange!S1 && isForwardRange!S2 && isForwardRange!S1))
- {
- static if(hasLenght!S1)
- {
- if(str.length<=startIdx)return-1;
- str.popBackExactly(startIdx);
- }
- else
- str.popBackN(startIdx);
- return lastIndexOf(s,sub,cs);
- }
- unittest
- {
- debug(string) printf("string.lastIndexOf.unittest\n");
- foreach(S; TypeTuple!(string, wstring, dstring))
- {
- foreach(T; TypeTuple!(string, wstring, dstring))
- {
- enum typeStr = S.stringof ~ " " ~ T.stringof;
- assert(lastIndexOf(cast(S)null, to!T("a")) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 5) == 2, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 3) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6) == 4, typeStr ~
- format(" %u", lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6)));
- assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5) == 2, typeStr);
- assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd"), 3) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdefx"), to!T("x"), 1) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdefxy"), to!T("xy"), 6) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 8) == 8, typeStr);
- assert(lastIndexOf(to!S("öafö"), to!T("ö"), 3) == 0, typeStr ~
- to!string(lastIndexOf(to!S("öafö"), to!T("ö"), 3))); //BUG 10472
- assert(lastIndexOf(cast(S)null, to!T("a"), 1, CaseSensitive.no) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5, CaseSensitive.no) == 2, typeStr);
- assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 4, CaseSensitive.no) == 2, typeStr ~
- " " ~ to!string(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 3, CaseSensitive.no)));
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"),3 , CaseSensitive.no) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdefXY"), to!T("xy"), 4, CaseSensitive.no) == -1, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 7, CaseSensitive.no) == 7, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 4, CaseSensitive.no) == 2, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 4, CaseSensitive.no) == 2, typeStr);
- assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), 6, CaseSensitive.no) == 3, typeStr);
- assert(lastIndexOf(to!S(""), to!T(""), 0) == lastIndexOf(to!S(""), to!T("")), typeStr);
- }
- foreach(cs; EnumMembers!CaseSensitive)
- {
- enum csString = to!string(cs);
- assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), 6, cs) == 4, csString);
- assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), 6, cs) == 2, csString);
- assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), 3, cs) == 1, csString);
- }
- }
- }
- /**
- * Returns the representation of a string, which has the same type
- * as the string except the character type is replaced by $(D ubyte),
- * $(D ushort), or $(D uint) depending on the character width.
- */
- auto representation(Char)(Char[] s) pure nothrow
- if (isSomeChar!Char)
- {
- // Get representation type
- alias U = TypeTuple!(ubyte, ushort, uint)[Char.sizeof / 2];
- // const and immutable storage classes
- static if (is(Char == immutable))
- alias T = immutable(U);
- else static if (is(Char == const))
- alias T = const(U);
- else
- alias T = U;
- // shared storage class (because shared(const(T)) is possible)
- static if (is(Char == shared))
- alias ST = shared(T);
- else
- alias ST = T;
- return cast(ST[]) s;
- }
- ///ditto
- alias Representation(Range)=typeof([Range.init.front].representation());
- ///
- unittest
- {
- string s = "hello";
- static assert(is(typeof(representation(s)) == immutable(ubyte)[]));
- assert(representation(s) is cast(immutable(ubyte)[]) s);
- assert(representation(s) == [0x68, 0x65, 0x6c, 0x6c, 0x6f]);
- }
- unittest
- {
- assertCTFEable!(
- {
- void test(Char, T)(Char[] str)
- {
- static assert(is(typeof(representation(str)) == T[]));
- assert(representation(str) is cast(T[]) str);
- }
- foreach (Type; TypeTuple!(Tuple!(char , ubyte ),
- Tuple!(wchar, ushort),
- Tuple!(dchar, uint )))
- {
- alias Char = FieldTypeTuple!Type[0];
- alias Int = FieldTypeTuple!Type[1];
- enum immutable(Char)[] hello = "hello";
- test!( immutable Char, immutable Int)(hello);
- test!( const Char, const Int)(hello);
- test!( Char, Int)(hello.dup);
- test!( shared Char, shared Int)(cast(shared) hello.dup);
- test!(const shared Char, const shared Int)(hello);
- }
- });
- }
- /++
- Returns a string which is identical to $(D s) except that all of its
- characters are converted to lowercase (by preforming Unicode lowercase mapping).
- If none of $(D s) characters were affected, then $(D s) itself is returned.
- +/
- alias toLower = std.uni.toLower;
- /++
- Converts $(D s) to lowercase (by performing Unicode lowercase mapping) in place.
- For a few characters string length may increase after the transformation,
- in such a case the function reallocates exactly once.
- If $(D s) does not have any uppercase characters, then $(D s) is unaltered.
- +/
- alias toLowerInPlace = std.uni.toLowerInPlace;
- /++
- Returns a string which is identical to $(D s) except that all of its
- characters are converted to uppercase (by preforming Unicode uppercase mapping).
- If none of $(D s) characters were affected, then $(D s) itself is returned.
- +/
- alias toUpper = std.uni.toUpper;
- /++
- Converts $(D s) to uppercase (by performing Unicode uppercase mapping) in place.
- For a few characters string length may increase after the transformation,
- in such a case the function reallocates exactly once.
- If $(D s) does not have any lowercase characters, then $(D s) is unaltered.
- +/
- alias toUpperInPlace = std.uni.toUpperInPlace;
- /++
- Capitalize the first character of $(D s) and convert the rest of $(D s)
- to lowercase.
- +/
- S capitalize(S)(S s) @trusted pure
- if (isSomeString!S)
- {
- Unqual!(typeof(s[0]))[] retval;
- bool changed = false;
- foreach (i, dchar c; s)
- {
- dchar c2;
- if (i == 0)
- {
- c2 = std.uni.toUpper(c);
- if (c != c2)
- changed = true;
- }
- else
- {
- c2 = std.uni.toLower(c);
- if (c != c2)
- {
- if (!changed)
- {
- changed = true;
- retval = s[0 .. i].dup;
- }
- }
- }
- if (changed)
- std.utf.encode(retval, c2);
- }
- return changed ? cast(S)retval : s;
- }
- unittest
- {
- assertCTFEable!(
- {
- foreach (S; TypeTuple!(string, wstring, dstring, char[], wchar[], dchar[]))
- {
- S s1 = to!S("FoL");
- S s2;
- s2 = capitalize(s1);
- assert(cmp(s2, "Fol") == 0);
- assert(s2 !is s1);
- s2 = capitalize(s1[0 .. 2]);
- assert(cmp(s2, "Fo") == 0);
- assert(s2.ptr == s1.ptr);
- s1 = to!S("fOl");
- s2 = capitalize(s1);
- assert(cmp(s2, "Fol") == 0);
- assert(s2 !is s1);
- s1 = to!S("\u0131 \u0130");
- s2 = capitalize(s1);
- assert(cmp(s2, "I \u0130") == 0);
- assert(s2 !is s1);
- s1 = to!S("\u017F \u0049");
- s2 = capitalize(s1);
- assert(cmp(s2, "\u0053 \u0069") == 0);
- assert(s2 !is s1);
- }
- });
- }
- ///ditto
- struct Capitalize(R)
- {
- private:
- typeof(R.init.map!std.uni.toLower())rg;
- bool isFirst=true;
- public:
- alias rg this;
- this(R r)pure
- {
- rg=r.map!std.uni.toLower();
- }
- @propriety auto front()
- {
- if(isFirst)return rg.front.toUpper();
- else return rg.front;
- }
- void popFront()
- {
- isFirst=false;
- rg.popFront();
- }
- static if(isRandomAccessRange!R)
- {
- auto opIndex(size_t i)
- {
- if(i==0)return rg.front.toUpper();
- else return rg[i];
- }
- }
- static if(hasSlicing!R)
- Capitalize!R opSlice(size_t i,size_t j)
- {
- Capitalize!R result;
- result.rg=rg[i..j];
- result.isFirst= i==0;
- return result;
- }
- }
- ///ditto
- Capitalize!R capitalize(R)(R r)@safe pure if(isInputRange!R)
- {
- return typeof(return)(r);
- }
- unittest{}//TODO
- /++
- Split $(D s) into an array of lines using $(D '\r'), $(D '\n'),
- $(D "\r\n"), $(XREF uni, lineSep), and $(XREF uni, paraSep) as delimiters.
- If $(D keepTerm) is set to $(D KeepTerminator.yes), then the delimiter
- is included in the strings returned.
- +/
- enum KeepTerminator : bool { no, yes }
- /// ditto
- S[] splitLines(S)(S s, KeepTerminator keepTerm = KeepTerminator.no) @safe pure
- if (isSomeString!S)
- {
- size_t iStart = 0;
- size_t nextI = 0;
- auto retval = appender!(S[])();
- for (size_t i; i < s.length; i = nextI)
- {
- immutable c = decode(s, nextI);
- if (c == '\r' || c == '\n' || c == lineSep || c == paraSep)
- {
- immutable isWinEOL = c == '\r' && i + 1 < s.length && s[i + 1] == '\n';
- auto iEnd = i;
- if (keepTerm == KeepTerminator.yes)
- {
- iEnd = isWinEOL? nextI + 1 : nextI;
- }
- retval.put(s[iStart .. iEnd]);
- iStart = nextI;
- if (isWinEOL)
- {
- ++nextI;
- ++iStart;
- }
- }
- }
- if (iStart != nextI)
- retval.put(s[iStart .. $]);
- return retval.data;
- }
- unittest
- {
- debug(string) printf("string.splitLines.unittest\n");
- assertCTFEable!(
- {
- foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
- {
- auto s = to!S("\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\n");
- auto lines = splitLines(s);
- assert(lines.length == 9);
- assert(lines[0] == "");
- assert(lines[1] == "peter");
- assert(lines[2] == "");
- assert(lines[3] == "paul");
- assert(lines[4] == "jerry");
- assert(lines[5] == "ice");
- assert(lines[6] == "cream");
- assert(lines[7] == "");
- assert(lines[8] == "sunday");
- lines = splitLines(s, KeepTerminator.yes);
- assert(lines.length == 9);
- assert(lines[0] == "\r");
- assert(lines[1] == "peter\n");
- assert(lines[2] == "\r");
- assert(lines[3] == "paul\r\n");
- assert(lines[4] == "jerry\u2028");
- assert(lines[5] == "ice\u2029");
- assert(lines[6] == "cream\n");
- assert(lines[7] == "\n");
- assert(lines[8] == "sunday\n");
- s.popBack(); // Lop-off trailing \n
- lines = splitLines(s);
- assert(lines.length == 9);
- assert(lines[8] == "sunday");
- lines = splitLines(s, KeepTerminator.yes);
- assert(lines.length == 9);
- assert(lines[8] == "sunday");
- }
- });
- }
- /+
- ///ditto
- auto splitLines(R)(R r,KeepTerminator keepTerm=KeepTerminator.no)@safe pure
- if(isInputRange!R&&hasLvalueElements!R)
- {
- alias Ret=typeof(return);
- immutable nl="\n";//TODO:newline based on os' own newline
- static auto sanitize(Ror)(Ror ror,bool keep)
- {
- if(keep){
- static if(isBidirectionalRange!Ror&& isBidirectionalRange!Ret)
- return ror.filterBidirectional!(a=>a!="")();
- else static if(!isBidirectionalRange!Ret)
- return ror.filter!(a=>a!="")();
- else static assert(0,"can't give a bidirectional range from a non-bidirectional one")
- }
- else
- {
- if(hasLvalueElements!Ror)assert(0);
- typeof(ror.front)*prev;
- foreach(el;ror)
- {
- if(el.empty)*prev~=nl;
- else prev=⪙
- }
- return ror;
- }
- }
- return r.splitter(nl).sanitize(keepTerm==KeepTerminator.yes);
- }
- unittest{}//TODO
- +/
- /++
- Strips leading whitespace (as defined by $(XREF uni, isWhite)).
- Returns: $(D str) stripped of leading whitespace.
- Postconditions: $(D str) and the returned value
- will share the same tail (see $(XREF array, sameTail)).
- +/
- auto stripLeft(S)(S str) @safe pure
- if (isInputRange!S &&isSomeChar!(ElementType!S))
- {
- return str.stripLeft!std.uni.isWhite();
- }
- ///
- @safe pure unittest
- {
- assert(stripLeft(" hello world ") ==
- "hello world ");
- assert(stripLeft("\n\t\v\rhello world\n\t\v\r") ==
- "hello world\n\t\v\r");
- assert(stripLeft("hello world") ==
- "hello world");
- assert(stripLeft([lineSep] ~ "hello world" ~ lineSep) ==
- "hello world" ~ [lineSep]);
- assert(stripLeft([paraSep] ~ "hello world" ~ paraSep) ==
- "hello world" ~ [paraSep]);
- }
- /++
- Strips trailing whitespace (as defined by $(XREF uni, isWhite)).
- Returns: $(D str) stripped of trailing whitespace.
- Postconditions: $(D str) and the returned value
- will share the same head (see $(XREF array, sameHead)).
- +/
- auto stripRight(S)(S str) @safe pure
- if (isBidirectionalRange!S&&isSomeChar!(ElementType!S))
- {
- return str.stripRight!std.uni.isWhite();
- }
- ///
- @safe pure unittest
- {
- assert(stripRight(" hello world ") ==
- " hello world");
- assert(stripRight("\n\t\v\rhello world\n\t\v\r") ==
- "\n\t\v\rhello world");
- assert(stripRight("hello world") ==
- "hello world");
- assert(stripRight([lineSep] ~ "hello world" ~ lineSep) ==
- [lineSep] ~ "hello world");
- assert(stripRight([paraSep] ~ "hello world" ~ paraSep) ==
- [paraSep] ~ "hello world");
- }
- /++
- Strips both leading and trailing whitespace (as defined by
- $(XREF uni, isWhite)).
- Returns: $(D str) stripped of trailing whitespace.
- +/
- auto strip(S)(S str) @safe pure
- if (isBidirectionalRange!S&&isSomeChar!(ElementType!S))
- {
- return stripRight(stripLeft(str));
- }
- ///
- @safe pure unittest
- {
- assert(strip(" hello world ") ==
- "hello world");
- assert(strip("\n\t\v\rhello world\n\t\v\r") ==
- "hello world");
- assert(strip("hello world") ==
- "hello world");
- assert(strip([lineSep] ~ "hello world" ~ [lineSep]) ==
- "hello world");
- assert(strip([paraSep] ~ "hello world" ~ [paraSep]) ==
- "hello world");
- }
- unittest
- {
- debug(string) printf("string.strip.unittest\n");
- assertCTFEable!(
- {
- foreach (S; TypeTuple!( char[], const char[], string,
- wchar[], const wchar[], wstring,
- dchar[], const dchar[], dstring))
- {
- assert(equal(stripLeft(to!S(" foo\t ")), "foo\t "));
- assert(equal(stripLeft(to!S("\u2008 foo\t \u2007")), "foo\t \u2007"));
- assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB \r"));
- assert(equal(stripLeft(to!S("1")), "1"));
- assert(equal(stripLeft(to!S("\U0010FFFE")), "\U0010FFFE"));
- assert(equal(stripLeft(to!S("")), ""));
- assert(equal(stripRight(to!S(" foo\t ")), " foo"));
- assert(equal(stripRight(to!S("\u2008 foo\t \u2007")), "\u2008 foo"));
- assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r")), "\u0085 μ \u0085 \u00BB"));
- assert(equal(stripRight(to!S("1")), "1"));
- assert(equal(stripRight(to!S("\U0010FFFE")), "\U0010FFFE"));
- assert(equal(stripRight(to!S("")), ""));
- assert(equal(strip(to!S(" foo\t ")), "foo"));
- assert(equal(strip(to!S("\u2008 foo\t \u2007")), "foo"));
- assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r")), "μ \u0085 \u00BB"));
- assert(equal(strip(to!S("\U0010FFFE")), "\U0010FFFE"));
- assert(equal(strip(to!S("")), ""));
- }
- });
- }
- @safe pure unittest
- {
- assertCTFEable!(
- {
- wstring s = " ";
- assert(s.sameTail(s.stripLeft()));
- assert(s.sameHead(s.stripRight()));
- });
- }
- /++
- If $(D str) ends with $(D delimiter), then $(D str) is returned without
- $(D delimiter) on its end. If it $(D str) does $(I not) end with
- $(D delimiter), then it is returned unchanged.
- If no $(D delimiter) is given, then one trailing $(D '\r'), $(D '\n'),
- $(D "\r\n"), $(XREF uni, lineSep), or $(XREF uni, paraSep) is removed from
- the end of $(D str). If $(D str) does not end with any of those characters,
- then it is returned unchanged.
- +/
- S chomp(S)(S str) @safe pure
- if (isBidirectionalRange!S && isSomeChar!(ElementType!S))
- {
- if (str.empty)
- return str;
- switch (str.back)
- {
- case '\n':
- {
- str.popBack();
- if(str.back!='\r')return str;
- goto case;
- }
- case '\r':
- {
- str.popBack();
- return str;
- }
- //Pops off the last character if it's lineSep or paraSep.
- static if (is(ElementType!S : const char)&&isForwardRange!S)
- {
- //In UTF-8, lineSep and paraSep are [226, 128, 168], and
- //[226, 128, 169] respectively, so their first two bytes are the same.
- case 168: //Last byte of lineSep
- case 169: //Last byte of paraSep
- {
- S orig=str.save;
- str.popBack();
- if(str.back==128)
- {
- str.popBack();
- if(str.back==266){
- str.popBack();
- return str;
- }
- }
- return orig;
- }
- }
- else
- {
- case lineSep:
- case paraSep:
- goto case '\r';
- }
- default:
- return str;
- }
- }
- /// Ditto
- S1 chomp(S1, S2)(S1 str, S2 delimiter) @safe pure
- if (isSomeChar!(ElementType!S1) && isSomeChar!(ElementType!S2)&&
- isBidirectionalRange!S1&&isBidirectionalRange!S2
- &&is(Unqual!(ElementType!S1) == Unqual!(ElementType!S2))&&hasLenght!S2)
- {
- if (delimiter.empty)
- return chomp(str);
- if (str.endsWith(delimiter))
- str.popBackExactly(delimiter.length);
- return str;
- }
- /// Ditto
- S1 chomp(S1, S2)(S1 str, S2 delimiter) @safe pure
- if (isSomeChar!(ElementType!S1) && isSomeChar!(ElementType!S2)&&
- isBidirectionalRange!S1&&isBidirectionalRange!S2&&isForwardRange!S1
- (!is(Unqual!(ElementType!S1) == Unqual!(ElementType!S2))|| !hasLenght!S2) )
- {
- if (delimiter.empty)
- return chomp(str);
- auto orig = str.save;
- foreach_reverse (dchar c; delimiter)
- {
- if (str.empty || str.back != c)
- {
- return orig;
- }
- str.popBack();
- }
- return str;
- }
- ///
- @safe pure unittest
- {
- assert(chomp(" hello world \n\r") == " hello world \n");
- assert(chomp(" hello world \r\n") == " hello world ");
- assert(chomp(" hello world \n\n") == " hello world \n");
- assert(chomp(" hello world \n\n ") == " hello world \n\n ");
- assert(chomp(" hello world \n\n" ~ [lineSep]) == " hello world \n\n");
- assert(chomp(" hello world \n\n" ~ [paraSep]) == " hello world \n\n");
- assert(chomp(" hello world") == " hello world");
- assert(chomp("") == "");
- assert(chomp(" hello world", "orld") == " hello w");
- assert(chomp(" hello world", " he") == " hello world");
- assert(chomp("", "hello") == "");
- // Don't decode pointlessly
- assert(chomp("hello\xFE", "\r") == "hello\xFE");
- }
- unittest
- {
- debug(string) printf("string.chomp.unittest\n");
- string s;
- assertCTFEable!(
- {
- foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
- {
- // @@@ BUG IN COMPILER, MUST INSERT CAST
- assert(chomp(cast(S)null) is null);
- assert(chomp(to!S("hello")) == "hello");
- assert(chomp(to!S("hello\n")) == "hello");
- assert(chomp(to!S("hello\r")) == "hello");
- assert(chomp(to!S("hello\r\n")) == "hello");
- assert(chomp(to!S("hello\n\r")) == "hello\n");
- assert(chomp(to!S("hello\n\n")) == "hello\n");
- assert(chomp(to!S("hello\r\r")) == "hello\r");
- assert(chomp(to!S("hello\nxxx\n")) == "hello\nxxx");
- assert(chomp(to!S("hello\u2028")) == "hello");
- assert(chomp(to!S("hello\u2029")) == "hello");
- assert(chomp(to!S("hello\u2028\u2028")) == "hello\u2028");
- assert(chomp(to!S("hello\u2029\u2029")) == "hello\u2029");
- foreach (T; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
- {
- // @@@ BUG IN COMPILER, MUST INSERT CAST
- assert(chomp(cast(S)null, cast(T)null) is null);
- assert(chomp(to!S("hello\n"), cast(T)null) == "hello");
- assert(chomp(to!S("hello"), to!T("o")) == "hell");
- assert(chomp(to!S("hello"), to!T("p")) == "hello");
- // @@@ BUG IN COMPILER, MUST INSERT CAST
- assert(chomp(to!S("hello"), cast(T) null) == "hello");
- assert(chomp(to!S("hello"), to!T("llo")) == "he");
- assert(chomp(to!S("\uFF28ello"), to!T("llo")) == "\uFF28e");
- assert(chomp(to!S("\uFF28el\uFF4co"), to!T("l\uFF4co")) == "\uFF28e");
- }
- }
- });
- }
- /++
- If $(D str) starts with $(D delimiter), then the part of $(D str) following
- $(D delimiter) is returned. If it $(D str) does $(I not) start with
- $(D delimiter), then it is returned unchanged.
- +/
- S1 chompPrefix(S1,S2)(S1 str, S2 delimiter) @safe pure
- if (isInputRange!S1 && isInputRange!S2 && hasLenght!S2
- &&is(Unqual!(ElementType!S1) == Unqual!(ElementType!S2)))
- {
- if (str.startsWith(delimiter))
- str.popFrontExactly(delimiter.length);
- return str;
- }
- ///ditto
- S1 chompPrefix(S1,S2)(S1 str, S2 delimiter) @safe pure
- if (isSomeChar!(ElementType!S1) && isSomeChar!(ElementType!S2) &&
- isInputRange!S2 && isForwardRange!S1)
- {
- S1 orig = str.save;
- size_t index = 0;
- foreach (dchar c; delimiter)
- {
- if (index >= str.length || decode(str, index) != c)
- return orig;
- }
- str.popFrontExactly(index);
- return str;
- }
- ///
- @safe pure unittest
- {
- assert(chompPrefix("hello world", "he") == "llo world");
- assert(chompPrefix("hello world", "hello w") == "orld");
- assert(chompPrefix("hello world", " world") == "hello world");
- assert(chompPrefix("", "hello") == "");
- }
- /* @safe */ pure unittest
- {
- assertCTFEable!(
- {
- foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
- {
- foreach (T; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
- {
- assert(equal(chompPrefix(to!S("abcdefgh"), to!T("abcde")), "fgh"));
- assert(equal(chompPrefix(to!S("abcde"), to!T("abcdefgh")), "abcde"));
- assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el\uFF4co")), ""));
- assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el")), "\uFF4co"));
- assert(equal(chompPrefix(to!S("\uFF28el"), to!T("\uFF28el\uFF4co")), "\uFF28el"));
- }
- }
- });
- }
- /++
- Returns $(D str) without its last character, if there is one. If $(D str)
- ends with $(D "\r\n"), then both are removed. If $(D str) is empty, then
- then it is returned unchanged.
- +/
- S chop(S)(S str) @safe pure
- if (isBidirectionalRange!S &&isSomeChar!(ElementType!S))
- {
- if (str.empty)
- return str;
- auto last=str.back;
- str.popBack();
- if(last=='\n'&&str.back=='\r')str.popBack();
- return str;
- }
- ///
- @safe pure unittest
- {
- assert(chop("hello world") == "hello worl");
- assert(chop("hello world\n") == "hello world");
- assert(chop("hello world\r") == "hello world");
- assert(chop("hello world\n\r") == "hello world\n");
- assert(chop("hello world\r\n") == "hello world");
- assert(chop("Walter Bright") == "Walter Brigh");
- assert(chop("") == "");
- }
- unittest
- {
- debug(string) printf("string.chop.unittest\n");
- assertCTFEable!(
- {
- foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
- {
- assert(chop(cast(S) null) is null);
- assert(equal(chop(to!S("hello")), "hell"));
- assert(equal(chop(to!S("hello\r\n")), "hello"));
- assert(equal(chop(to!S("hello\n\r")), "hello\n"));
- assert(equal(chop(to!S("Verité")), "Verit"));
- assert(equal(chop(to!S(`さいごの果実`)), "さいごの果"));
- assert(equal(chop(to!S(`ミツバチと科学者`)), "ミツバチと科学"));
- }
- });
- }
- /++
- Left justify $(D s) in a field $(D width) characters wide. $(D fillChar)
- is the character that will be used to fill up the space in the field that
- $(D s) doesn't fill.
- +/
- S leftJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure
- if (isSomeString!S)
- {
- alias C = ElementEncodingType!S;
- if (canSearchInCodeUnits!C(fillChar))
- {
- immutable len = s.walkLength();
- if (len >= width)
- return s;
- auto retval = new Unqual!(C)[width - len + s.length];
- retval[0 .. s.length] = s[];
- retval[s.length .. $] = cast(C)fillChar;
- return cast(S)retval;
- }
- else
- {
- auto dstr = to!dstring(s);
- if (dstr.length >= width)
- return s;
- auto retval = new dchar[](width);
- retval[0 .. dstr.length] = dstr[];
- retval[dstr.length .. $] = fillChar;
- return to!S(retval);
- }
- }
- /++
- Right justify $(D s) in a field $(D width) characters wide. $(D fillChar)
- is the character that will be used to fill up the space in the field that
- $(D s) doesn't fill.
- +/
- S rightJustify(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure
- if (isSomeString!S)
- {
- alias C = ElementEncodingType!S;
- if (canSearchInCodeUnits!C(fillChar))
- {
- immutable len = s.walkLength();
- if (len >= width)
- return s;
- auto retval = new Unqual!C[width - len + s.length];
- retval[0 .. $ - s.length] = cast(C)fillChar;
- retval[$ - s.length .. $] = s[];
- return cast(S)retval;
- }
- else
- {
- auto dstr = to!dstring(s);
- if (dstr.length >= width)
- return s;
- auto retval = new dchar[](width);
- retval[0 .. $ - dstr.length] = fillChar;
- retval[$ - dstr.length .. $] = dstr[];
- return to!S(retval);
- }
- }
- /++
- Center $(D s) in a field $(D width) characters wide. $(D fillChar)
- is the character that will be used to fill up the space in the field that
- $(D s) doesn't fill.
- +/
- S center(S)(S s, size_t width, dchar fillChar = ' ') @trusted pure
- if (isSomeString!S)
- {
- alias C = ElementEncodingType!S;
- if (canSearchInCodeUnits!C(fillChar))
- {
- immutable len = s.walkLength();
- if (len >= width)
- return s;
- auto retval = new Unqual!C[width - len + s.length];
- immutable left = (retval.length - s.length) / 2;
- retval[0 .. left] = cast(C)fillChar;
- retval[left .. left + s.length] = s[];
- retval[left + s.length .. $] = cast(C)fillChar;
- return to!S(retval);
- }
- else
- {
- auto dstr = to!dstring(s);
- if (dstr.length >= width)
- return s;
- auto retval = new dchar[](width);
- immutable left = (retval.length - dstr.length) / 2;
- retval[0 .. left] = fillChar;
- retval[left .. left + dstr.length] = dstr[];
- retval[left + dstr.length .. $] = fillChar;
- return to!S(retval);
- }
- }
- unittest
- {
- debug(string) printf("string.justify.unittest\n");
- assertCTFEable!(
- {
- foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
- {
- S s = to!S("hello");
- assert(leftJustify(s, 2) == "hello");
- assert(rightJustify(s, 2) == "hello");
- assert(center(s, 2) == "hello");
- assert(leftJustify(s, 7) == "hello ");
- assert(rightJustify(s, 7) == " hello");
- assert(center(s, 7) == " hello ");
- assert(leftJustify(s, 8) == "hello ");
- assert(rightJustify(s, 8) == " hello");
- assert(center(s, 8) == " hello ");
- assert(leftJustify(s, 8, '\u0100') == "hello\u0100\u0100\u0100");
- assert(rightJustify(s, 8, '\u0100') == "\u0100\u0100\u0100hello");
- assert(center(s, 8, '\u0100') == "\u0100hello\u0100\u0100");
- assert(leftJustify(s, 8, 'ö') == "helloööö");
- assert(rightJustify(s, 8, 'ö') == "öööhello");
- assert(center(s, 8, 'ö') == "öhelloöö");
- }
- });
- }
- /++
- Replace each tab character in $(D s) with the number of spaces necessary
- to align the following character at the next tab stop where $(D tabSize)
- is the distance between tab stops.
- +/
- S detab(S)(S s, size_t tabSize = 8) @trusted pure
- if (isSomeString!S)
- {
- assert(tabSize > 0);
- alias C = Unqual!(typeof(s[0]));
- bool changes = false;
- C[] result;
- int column;
- size_t nspaces;
- foreach (size_t i, dchar c; s)
- {
- switch (c)
- {
- case '\t':
- nspaces = tabSize - (column % tabSize);
- if (!changes)
- {
- changes = true;
- result = null;
- result.length = s.length + nspaces - 1;
- result.length = i + nspaces;
- result[0 .. i] = s[0 .. i];
- result[i .. i + nspaces] = ' ';
- }
- else
- {
- ptrdiff_t j = result.length;
- result.length = j + nspaces;
- result[j .. j + nspaces] = ' ';
- }
- column += nspaces;
- break;
- case '\r':
- case '\n':
- case paraSep:
- case lineSep:
- column = 0;
- goto L1;
- default:
- column++;
- L1:
- if (changes)
- {
- std.utf.encode(result, c);
- }
- break;
- }
- }
- return changes ? cast(S) result : s;
- }
- unittest
- {
- debug(string) printf("string.detab.unittest\n");
- assertCTFEable!(
- {
- foreach (S; TypeTuple!(char[], wchar[], dchar[], string, wstring, dstring))
- {
- S s = to!S("This \tis\t a fofof\tof list");
- assert(cmp(detab(s), "This is a fofof of list") == 0);
- assert(detab(cast(S)null) is null);
- assert(detab("").empty);
- assert(detab("a") == "a");
- assert(detab("\t") == " ");
- assert(detab("\t", 3) == " ");
- assert(detab("\t", 9) == " ");
- assert(detab( " ab\t asdf ") == " ab asdf ");
- assert(detab( " \U00010000b\tasdf ") == " \U00010000b asdf ");
- }
- });
- }
- /++
- Replaces spaces in $(D s) with the optimal number of tabs.
- All spaces and tabs at the end of a line are removed.
- Params:
- s = String to convert.
- tabSize = Tab columns are $(D tabSize) spaces apart.
- +/
- S entab(S)(S s, size_t tabSize = 8) @trusted pure
- if (isSomeString!S)
- {
- bool changes = false;
- alias C = Unqual!(typeof(s[0]));
- C[] result;
- int nspaces = 0;
- int nwhite = 0;
- size_t column = 0; // column number
- foreach (size_t i, dchar c; s)
- {
- void change()
- {
- changes = true;
- result = null;
- result.length = s.length;
- result.length = i;
- result[0 .. i] = s[0 .. i];
- }
- switch (c)
- {
- case '\t':
- nwhite++;
- if (nspaces)
- {
- if (!changes)
- change();
- ptrdiff_t j = result.length - nspaces;
- auto ntabs = (((column - nspaces) % tabSize) + nspaces) / tabSize;
- result.length = j + ntabs;
- result[j .. j + ntabs] = '\t';
- nwhite += ntabs - nspaces;
- nspaces = 0;
- }
- column = (column + tabSize) / tabSize * tabSize;
- break;
- case '\r':
- case '\n':
- case paraSep:
- case lineSep:
- // Truncate any trailing spaces or tabs
- if (nwhite)
- {
- if (!changes)
- change();
- result = result[0 .. result.length - nwhite];
- }
- break;
- default:
- if (nspaces >= 2 && (column % tabSize) == 0)
- {
- if (!changes)
- change();
- auto j = result.length - nspaces;
- auto ntabs = (nspaces + tabSize - 1) / tabSize;
- result.length = j + ntabs;
- result[j .. j + ntabs] = '\t';
- nwhite += ntabs - nspaces;
- nspaces = 0;
- }
- if (c == ' ')
- { nwhite++;
- nspaces++;
- }
- else
- { nwhite = 0;
- nspaces = 0;
- }
- column++;
- break;
- }
- if (changes)
- {
- std.utf.encode(result, c);
- }
- }
- // Truncate any trailing spaces or tabs
- if (nwhite)
- {
- if (changes)
- result = result[0 .. result.length - nwhite];
- else
- s = s[0 .. s.length - nwhite];
- }
- return changes ? assumeUnique(result) : s;
- }
- unittest
- {
- debug(string) printf("string.entab.unittest\n");
- assertCTFEable!(
- {
- assert(entab(cast(string) null) is null);
- assert(entab("").empty);
- assert(entab("a") == "a");
- assert(entab(" ") == "");
- assert(entab(" x") == "\tx");
- assert(entab(" ab asdf ") == " ab\tasdf");
- assert(entab(" ab asdf ") == " ab\t asdf");
- assert(entab(" ab \t asdf ") == " ab\t asdf");
- assert(entab("1234567 \ta") == "1234567\t\ta");
- assert(entab("1234567 \ta") == "1234567\t\ta");
- assert(entab("1234567 \ta") == "1234567\t\ta");
- assert(entab("1234567 \ta") == "1234567\t\ta");
- assert(entab("1234567 \ta") == "1234567\t\ta");
- assert(entab("1234567 \ta") == "1234567\t\ta");
- assert(entab("1234567 \ta") == "1234567\t\ta");
- assert(entab("1234567 \ta") == "1234567\t\ta");
- assert(entab("1234567 \ta") == "1234567\t\t\ta");
- assert(entab("a ") == "a");
- assert(entab("a\v") == "a\v");
- assert(entab("a\f") == "a\f");
- assert(entab("a\n") == "a\n");
- assert(entab("a\n\r") == "a\n\r");
- assert(entab("a\r\n") == "a\r\n");
- assert(entab("a\u2028") == "a\u2028");
- assert(entab("a\u2029") == "a\u2029");
- assert(entab("a ") == "a");
- assert(entab("a\t") == "a");
- assert(entab("\uFF28\uFF45\uFF4C\uFF4C567 \t\uFF4F \t") ==
- "\uFF28\uFF45\uFF4C\uFF4C567\t\t\uFF4F");
- });
- }
- /++
- Replaces the characters in $(D str) which are keys in $(D transTable) with
- their corresponding values in $(D transTable). $(D transTable) is an AA
- where its keys are $(D dchar) and its values are either $(D dchar) or some
- type of string. Also, if $(D toRemove) is given, the characters in it are
- removed from $(D str) prior to translation. $(D str) itself is unaltered.
- A copy with the changes is returned.
- See_Also:
- $(LREF tr)
- $(XREF array, replace)
- Params:
- str = The original string.
- transTable = The AA indicating which characters to replace and what to
- replace them with.
- toRemove = The characters to remove from the string.
- +/
- C1[] translate(C1, C2 = immutable char)(C1[] str,
- dchar[dchar] transTable,
- const(C2)[] toRemove = null) @safe pure
- if (isSomeChar!C1 && isSomeChar!C2)
- {
- auto buffer = appender!(C1[])();
- translateImpl(str, transTable, toRemove, buffer);
- return buffer.data;
- }
- ///
- @safe pure unittest
- {
- dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
- assert(translate("hello world", transTable1) == "h5ll7 w7rld");
- assert(translate("hello world", transTable1, "low") == "h5 rd");
- string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
- assert(translate("hello world", transTable2) == "h5llorange worangerld");
- }
- /* @safe */ pure unittest
- {
- assertCTFEable!(
- {
- foreach (S; TypeTuple!( char[], const( char)[], immutable( char)[],
- wchar[], const(wchar)[], immutable(wchar)[],
- dchar[], const(dchar)[], immutable(dchar)[]))
- {
- assert(translate(to!S("hello world"), cast(dchar[dchar])['h' : 'q', 'l' : '5']) ==
- to!S("qe55o wor5d"));
- assert(translate(to!S("hello world"), cast(dchar[dchar])['o' : 'l', 'l' : '\U00010143']) ==
- to!S("he\U00010143\U00010143l wlr\U00010143d"));
- assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['h' : 'q', 'l': '5']) ==
- to!S("qe55o \U00010143 wor5d"));
- assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['o' : '0', '\U00010143' : 'o']) ==
- to!S("hell0 o w0rld"));
- assert(translate(to!S("hello world"), cast(dchar[dchar])null) == to!S("hello world"));
- foreach (T; TypeTuple!( char[], const( char)[], immutable( char)[],
- wchar[], const(wchar)[], immutable(wchar)[],
- dchar[], const(dchar)[], immutable(dchar)[]))
- {
- assert(translate(to!S("hello world"),
- cast(dchar[dchar])['h' : 'q', 'l' : '5'],
- to!T("r")) ==
- to!S("qe55o wo5d"));
- assert(translate(to!S("hello world"),
- cast(dchar[dchar])['h' : 'q', 'l' : '5'],
- to!T("helo")) ==
- to!S(" wrd"));
- assert(translate(to!S("hello world"),
- cast(dchar[dchar])['h' : 'q', 'l' : '5'],
- to!T("q5")) ==
- to!S("qe55o wor5d"));
- assert(translate(to!S("hello \U00010143 world"),
- cast(dchar[dchar])['o' : '0', '\U00010143' : 'o'],
- to!T("\U00010143 ")) ==
- to!S("hell0w0rld"));
- }
- auto s = to!S("hello world");
- dchar[dchar] transTable = ['h' : 'q', 'l' : '5'];
- static assert(is(typeof(s) == typeof(translate(s, transTable))));
- }
- });
- }
- /++ Ditto +/
- C1[] translate(C1, S, C2 = immutable char)(C1[] str,
- S[dchar] transTable,
- const(C2)[] toRemove = null) @safe pure
- if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2)
- {
- auto buffer = appender!(C1[])();
- translateImpl(str, transTable, toRemove, buffer);
- return buffer.data;
- }
- /* @safe */ pure unittest
- {
- assertCTFEable!(
- {
- foreach (S; TypeTuple!( char[], const( char)[], immutable( char)[],
- wchar[], const(wchar)[], immutable(wchar)[],
- dchar[], const(dchar)[], immutable(dchar)[]))
- {
- assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"]) ==
- to!S("yellowe4242o wor42d"));
- assert(translate(to!S("hello world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
- to!S("he\U00010143\U00010143\U00010143\U00010143owl wowlr\U00010143\U00010143d"));
- assert(translate(to!S("hello \U00010143 world"), ['h' : "yellow", 'l' : "42"]) ==
- to!S("yellowe4242o \U00010143 wor42d"));
- assert(translate(to!S("hello \U00010143 world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
- to!S("he\U00010143\U00010143\U00010143\U00010143owl \U00010143 wowlr\U00010143\U00010143d"));
- assert(translate(to!S("hello \U00010143 world"), ['h' : ""]) ==
- to!S("ello \U00010143 world"));
- assert(translate(to!S("hello \U00010143 world"), ['\U00010143' : ""]) ==
- to!S("hello world"));
- assert(translate(to!S("hello world"), cast(string[dchar])null) == to!S("hello world"));
- foreach (T; TypeTuple!( char[], const( char)[], immutable( char)[],
- wchar[], const(wchar)[], immutable(wchar)[],
- dchar[], const(dchar)[], immutable(dchar)[]))
- {
- assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"], to!T("r")) ==
- to!S("yellowe4242o wo42d"));
- assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"], to!T("helo")) ==
- to!S(" wrd"));
- assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"], to!T("y42")) ==
- to!S("yellowe4242o wor42d"));
- assert(translate(to!S("hello \U00010143 world"),
- ['o' : "owl", '\U00010143' : "\n"],
- to!T("\U00010143 ")) ==
- to!S("hellowlwowlrld"));
- assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"], to!T("hello world")) ==
- to!S(""));
- assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"], to!T("42")) ==
- to!S("yellowe4242o wor42d"));
- }
- auto s = to!S("hello world");
- string[dchar] transTable = ['h' : "silly", 'l' : "putty"];
- static assert(is(typeof(s) == typeof(translate(s, transTable))));
- }
- });
- }
- /++
- This is an overload of $(D translate) which takes an existing buffer to write the contents to.
- Params:
- str = The original string.
- transTable = The AA indicating which characters to replace and what to
- replace them with.
- toRemove = The characters to remove from the string.
- buffer = An output range to write the contents to.
- +/
- void translate(C1, C2 = immutable char, Buffer)(C1[] str,
- dchar[dchar] transTable,
- const(C2)[] toRemove,
- Buffer buffer)
- if (isSomeChar!C1 && isSomeChar!C2 && isOutputRange!(Buffer, C1))
- {
- translateImpl(str, transTable, toRemove, buffer);
- }
- ///
- @safe pure unittest
- {
- dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
- auto buffer = appender!(dchar[])();
- translate("hello world", transTable1, null, buffer);
- assert(buffer.data == "h5ll7 w7rld");
- buffer.clear();
- translate("hello world", transTable1, "low", buffer);
- assert(buffer.data == "h5 rd");
- buffer.clear();
- string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
- translate("hello world", transTable2, null, buffer);
- assert(buffer.data == "h5llorange worangerld");
- }
- /++ Ditto +/
- void translate(C1, S, C2 = immutable char, Buffer)(C1[] str,
- S[dchar] transTable,
- const(C2)[] toRemove,
- Buffer buffer)
- if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2 && isOutputRange!(Buffer, S))
- {
- translateImpl(str, transTable, toRemove, buffer);
- }
- private void translateImpl(C1, T, C2, Buffer)(C1[] str,
- T transTable,
- const(C2)[] toRemove,
- Buffer buffer)
- {
- bool[dchar] removeTable;
- foreach (dchar c; toRemove)
- removeTable[c] = true;
- foreach (dchar c; str)
- {
- if (c in removeTable)
- continue;
- auto newC = c in transTable;
- if (newC)
- put(buffer, *newC);
- else
- put(buffer, c);
- }
- }
- /++
- This is an $(I $(RED ASCII-only)) overload of $(LREF _translate). It
- will $(I not) work with Unicode. It exists as an optimization for the
- cases where Unicode processing is not necessary.
- Unlike the other overloads of $(LREF _translate), this one does not take
- an AA. Rather, it takes a $(D string) generated by $(LREF makeTrans).
- The array generated by $(D makeTrans) is $(D 256) elements long such that
- the index is equal to the ASCII character being replaced and the value is
- equal to the character that it's being replaced with. Note that translate
- does not decode any of the characters, so you can actually pass it Extended
- ASCII characters if you want to (ASCII only actually uses $(D 128)
- characters), but be warned that Extended ASCII characters are not valid
- Unicode and therefore will result in a $(D UTFException) being thrown from
- most other Phobos functions.
- Also, because no decoding occurs, it is possible to use this overload to
- translate ASCII characters within a proper UTF-8 string without altering the
- other, non-ASCII characters. It's replacing any code unit greater than
- $(D 127) with another code unit or replacing any code unit with another code
- unit greater than $(D 127) which will cause UTF validation issues.
- See_Also:
- $(LREF tr)
- $(XREF array, replace)
- Params:
- str = The original string.
- transTable = The string indicating which characters to replace and what
- to replace them with. It is generated by $(LREF makeTrans).
- toRemove = The characters to remove from the string.
- +/
- C[] translate(C = immutable char)(in char[] str, in char[] transTable, in char[] toRemove = null) @trusted pure nothrow
- if (is(Unqual!C == char))
- in
- {
- assert(transTable.length == 256);
- }
- body
- {
- bool[256] remTable = false;
- foreach (char c; toRemove)
- remTable[c] = true;
- size_t count = 0;
- foreach (char c; str)
- {
- if (!remTable[c])
- ++count;
- }
- auto buffer = new char[count];
- translateImplAscii(str, transTable, remTable, buffer, toRemove);
- return cast(C[])(buffer);
- }
- /++ Ditto +/
- string makeTrans(in char[] from, in char[] to) @trusted pure nothrow
- in
- {
- assert(from.length == to.length);
- assert(from.length <= 256);
- foreach (char c; from)
- assert(std.ascii.isASCII(c));
- foreach (char c; to)
- assert(std.ascii.isASCII(c));
- }
- body
- {
- char[] transTable = new char[256];
- foreach (i; 0 .. transTable.length)
- transTable[i] = cast(char)i;
- foreach (i; 0 .. from.length)
- transTable[from[i]] = to[i];
- return assumeUnique(transTable);
- }
- ///
- @safe pure nothrow unittest
- {
- auto transTable1 = makeTrans("eo5", "57q");
- assert(translate("hello world", transTable1) == "h5ll7 w7rld");
- assert(translate("hello world", transTable1, "low") == "h5 rd");
- }
- @safe pure unittest
- {
- assertCTFEable!(
- {
- foreach (C; TypeTuple!(char, const char, immutable char))
- {
- assert(translate!C("hello world", makeTrans("hl", "q5")) == to!(C[])("qe55o wor5d"));
- auto s = to!(C[])("hello world");
- auto transTable = makeTrans("hl", "q5");
- static assert(is(typeof(s) == typeof(translate!C(s, transTable))));
- }
- foreach (S; TypeTuple!(char[], const(char)[], immutable(char)[]))
- {
- assert(translate(to!S("hello world"), makeTrans("hl", "q5")) == to!S("qe55o wor5d"));
- assert(translate(to!S("hello \U00010143 world"), makeTrans("hl", "q5")) ==
- to!S("qe55o \U00010143 wor5d"));
- assert(translate(to!S("hello world"), makeTrans("ol", "1o")), to!S("heool wlrdd"));
- assert(translate(to!S("hello world"), makeTrans("", "")) == to!S("hello world"));
- assert(translate(to!S("hello world"), makeTrans("12345", "67890")) == to!S("hello world"));
- assert(translate(to!S("hello \U00010143 world"), makeTrans("12345", "67890")) ==
- to!S("hello \U00010143 world"));
- foreach (T; TypeTuple!(char[], const(char)[], immutable(char)[]))
- {
- assert(translate(to!S("hello world"), makeTrans("hl", "q5"), to!T("r")) ==
- to!S("qe55o wo5d"));
- assert(translate(to!S("hello \U00010143 world"), makeTrans("hl", "q5"), to!T("r")) ==
- to!S("qe55o \U00010143 wo5d"));
- assert(translate(to!S("hello world"), makeTrans("hl", "q5"), to!T("helo")) ==
- to!S(" wrd"));
- assert(translate(to!S("hello world"), makeTrans("hl", "q5"), to!T("q5")) ==
- to!S("qe55o wor5d"));
- }
- }
- });
- }
- /++
- This is an $(I $(RED ASCII-only)) overload of $(D translate) which takes an existing buffer to write the contents to.
- Params:
- str = The original string.
- transTable = The string indicating which characters to replace and what
- to replace them with. It is generated by $(LREF makeTrans).
- toRemove = The characters to remove from the string.
- buffer = An output range to write the contents to.
- +/
- void translate(C = immutable char, Buffer)(in char[] str, in char[] transTable, in char[] toRemove, Buffer buffer)
- if (is(Unqual!C == char) && isOutputRange!(Buffer, char))
- in
- {
- assert(transTable.length == 256);
- }
- body
- {
- bool[256] remTable = false;
- foreach (char c; toRemove)
- remTable[c] = true;
- translateImplAscii(str, transTable, remTable, buffer, toRemove);
- }
- ///
- @safe pure unittest
- {
- auto buffer = appender!(char[])();
- auto transTable1 = makeTrans("eo5", "57q");
- translate("hello world", transTable1, null, buffer);
- assert(buffer.data == "h5ll7 w7rld");
- buffer.clear();
- translate("hello world", transTable1, "low", buffer);
- assert(buffer.data == "h5 rd");
- }
- private void translateImplAscii(C = immutable char, Buffer)(in char[] str, in char[] transTable, ref bool[256] remTable, Buffer buffer, in char[] toRemove = null)
- {
- static if (isOutputRange!(Buffer, char))
- {
- foreach (char c; str)
- {
- if (!remTable[c])
- put(buffer, transTable[c]);
- }
- }
- else
- {
- size_t i = 0;
- foreach (char c; str)
- {
- if (!remTable[c])
- buffer[i++] = transTable[c];
- }
- }
- }
- /*****************************************************
- * Format arguments into a string.
- *
- * Params: fmt = Format string. For detailed specification, see $(XREF format,formattedWrite).
- * args = Variadic list of arguments to format into returned string.
- *
- * $(RED format's current implementation has been replaced with $(LREF xformat)'s
- * implementation. in November 2012.
- * This is seamless for most code, but it makes it so that the only
- * argument that can be a format string is the first one, so any
- * code which used multiple format strings has broken. Please change
- * your calls to format accordingly.
- *
- * e.g.:
- * ----
- * format("key = %s", key, ", value = %s", value)
- * ----
- * needs to be rewritten as:
- * ----
- * format("key = %s, value = %s", key, value)
- * ----
- * )
- */
- string format(Char, Args...)(in Char[] fmt, Args args)
- {
- auto w = appender!string();
- auto n = formattedWrite(w, fmt, args);
- version (all)
- {
- // In the future, this check will be removed to increase consistency
- // with formattedWrite
- enforce(n == args.length, new FormatException(
- text("Orphan format arguments: args[", n, "..", args.length, "]")));
- }
- return w.data;
- }
- unittest
- {
- debug(string) printf("std.string.format.unittest\n");
- assertCTFEable!(
- {
- // assert(format(null) == "");
- assert(format("foo") == "foo");
- assert(format("foo%%") == "foo%");
- assert(format("foo%s", 'C') == "fooC");
- assert(format("%s foo", "bar") == "bar foo");
- assert(format("%s foo %s", "bar", "abc") == "bar foo abc");
- assert(format("foo %d", -123) == "foo -123");
- assert(format("foo %d", 123) == "foo 123");
- assertThrown!FormatException(format("foo %s"));
- assertThrown!FormatException(format("foo %s", 123, 456));
- assert(format("hel%slo%s%s%s", "world", -138, 'c', true) ==
- "helworldlo-138ctrue");
- });
- }
- /*****************************************************
- * Format arguments into buffer <i>buf</i> which must be large
- * enough to hold the result. Throws RangeError if it is not.
- * Returns: The slice of $(D buf) containing the formatted string.
- *
- * $(RED sformat's current implementation has been replaced with $(LREF xsformat)'s
- * implementation. in November 2012.
- * This is seamless for most code, but it makes it so that the only
- * argument that can be a format string is the first one, so any
- * code which used multiple format strings has broken. Please change
- * your calls to sformat accordingly.
- *
- * e.g.:
- * ----
- * sformat(buf, "key = %s", key, ", value = %s", value)
- * ----
- * needs to be rewritten as:
- * ----
- * sformat(buf, "key = %s, value = %s", key, value)
- * ----
- * )
- */
- char[] sformat(Char, Args...)(char[] buf, in Char[] fmt, Args args)
- {
- size_t i;
- struct Sink
- {
- void put(dchar c)
- {
- char[4] enc;
- auto n = encode(enc, c);
- if (buf.length < i + n)
- onRangeError("std.string.sformat", 0);
- buf[i .. i + n] = enc[0 .. n];
- i += n;
- }
- void put(const(char)[] s)
- {
- if (buf.length < i + s.length)
- onRangeError("std.string.sformat", 0);
- buf[i .. i + s.length] = s[];
- i += s.length;
- }
- void put(const(wchar)[] s)
- {
- for (; !s.empty; s.popFront())
- put(s.front);
- }
- void put(const(dchar)[] s)
- {
- for (; !s.empty; s.popFront())
- put(s.front);
- }
- }
- auto n = formattedWrite(Sink(), fmt, args);
- version (all)
- {
- // In the future, this check will be removed to increase consistency
- // with formattedWrite
- enforce(n == args.length, new FormatException(
- text("Orphan format arguments: args[", n, "..", args.length, "]")));
- }
- return buf[0 .. i];
- }
- unittest
- {
- debug(string) printf("std.string.sformat.unittest\n");
- assertCTFEable!(
- {
- char[10] buf;
- assert(sformat(buf[], "foo") == "foo");
- assert(sformat(buf[], "foo%%") == "foo%");
- assert(sformat(buf[], "foo%s", 'C') == "fooC");
- assert(sformat(buf[], "%s foo", "bar") == "bar foo");
- assertThrown!RangeError(sformat(buf[], "%s foo %s", "bar", "abc"));
- assert(sformat(buf[], "foo %d", -123) == "foo -123");
- assert(sformat(buf[], "foo %d", 123) == "foo 123");
- assertThrown!FormatException(sformat(buf[], "foo %s"));
- assertThrown!FormatException(sformat(buf[], "foo %s", 123, 456));
- assert(sformat(buf[], "%s %s %s", "c"c, "w"w, "d"d) == "c w d");
- });
- }
- // Explicitly undocumented. It will be removed in July 2014.
- deprecated("Please use std.string.format instead.") alias xformat = format;
- deprecated unittest
- {
- debug(string) printf("std.string.xformat.unittest\n");
- assertCTFEable!(
- {
- // assert(xformat(null) == "");
- assert(xformat("foo") == "foo");
- assert(xformat("foo%%") == "foo%");
- assert(xformat("foo%s", 'C') == "fooC");
- assert(xformat("%s foo", "bar") == "bar foo");
- assert(xformat("%s foo %s", "bar", "abc") == "bar foo abc");
- assert(xformat("foo %d", -123) == "foo -123");
- assert(xformat("foo %d", 123) == "foo 123");
- assertThrown!FormatException(xformat("foo %s"));
- assertThrown!FormatException(xformat("foo %s", 123, 456));
- });
- }
- // Explicitly undocumented. It will be removed in July 2014.
- deprecated("Please use std.string.sformat instead.") alias xsformat = sformat;
- deprecated unittest
- {
- debug(string) printf("std.string.xsformat.unittest\n");
- assertCTFEable!(
- {
- char[10] buf;
- assert(xsformat(buf[], "foo") == "foo");
- assert(xsformat(buf[], "foo%%") == "foo%");
- assert(xsformat(buf[], "foo%s", 'C') == "fooC");
- assert(xsformat(buf[], "%s foo", "bar") == "bar foo");
- assertThrown!RangeError(xsformat(buf[], "%s foo %s", "bar", "abc"));
- assert(xsformat(buf[], "foo %d", -123) == "foo -123");
- assert(xsformat(buf[], "foo %d", 123) == "foo 123");
- assertThrown!FormatException(xsformat(buf[], "foo %s"));
- assertThrown!FormatException(xsformat(buf[], "foo %s", 123, 456));
- assert(xsformat(buf[], "%s %s %s", "c"c, "w"w, "d"d) == "c w d");
- });
- }
- /***********************************************
- * See if character c is in the pattern.
- * Patterns:
- *
- * A <i>pattern</i> is an array of characters much like a <i>character
- * class</i> in regular expressions. A sequence of characters
- * can be given, such as "abcde". The '-' can represent a range
- * of characters, as "a-e" represents the same pattern as "abcde".
- * "a-fA-F0-9" represents all the hex characters.
- * If the first character of a pattern is '^', then the pattern
- * is negated, i.e. "^0-9" means any character except a digit.
- * The functions inPattern, <b>countchars</b>, <b>removeschars</b>,
- * and <b>squeeze</b>
- * use patterns.
- *
- * Note: In the future, the pattern syntax may be improved
- * to be more like regular expression character classes.
- */
- bool inPattern(S)(dchar c, in S pattern) @safe pure if (isSomeString!S)
- {
- bool result = false;
- int range = 0;
- dchar lastc;
- foreach (size_t i, dchar p; pattern)
- {
- if (p == '^' && i == 0)
- {
- result = true;
- if (i + 1 == pattern.length)
- return (c == p); // or should this be an error?
- }
- else if (range)
- {
- range = 0;
- if (lastc <= c && c <= p || c == p)
- return !result;
- }
- else if (p == '-' && i > result && i + 1 < pattern.length)
- {
- range = 1;
- continue;
- }
- else if (c == p)
- return !result;
- lastc = p;
- }
- return result;
- }
- unittest
- {
- debug(string) printf("std.string.inPattern.unittest\n");
- assertCTFEable!(
- {
- assert(inPattern('x', "x") == 1);
- assert(inPattern('x', "y") == 0);
- assert(inPattern('x', string.init) == 0);
- assert(inPattern('x', "^y") == 1);
- assert(inPattern('x', "yxxy") == 1);
- assert(inPattern('x', "^yxxy") == 0);
- assert(inPattern('x', "^abcd") == 1);
- assert(inPattern('^', "^^") == 0);
- assert(inPattern('^', "^") == 1);
- assert(inPattern('^', "a^") == 1);
- assert(inPattern('x', "a-z") == 1);
- assert(inPattern('x', "A-Z") == 0);
- assert(inPattern('x', "^a-z") == 0);
- assert(inPattern('x', "^A-Z") == 1);
- assert(inPattern('-', "a-") == 1);
- assert(inPattern('-', "^A-") == 0);
- assert(inPattern('a', "z-a") == 1);
- assert(inPattern('z', "z-a") == 1);
- assert(inPattern('x', "z-a") == 0);
- });
- }
- /***********************************************
- * See if character c is in the intersection of the patterns.
- */
- bool inPattern(S)(dchar c, S[] patterns) @safe pure if (isSomeString!S)
- {
- foreach (string pattern; patterns)
- {
- if (!inPattern(c, pattern))
- {
- return false;
- }
- }
- return true;
- }
- /********************************************
- * Count characters in s that match pattern.
- */
- size_t countchars(S, S1)(S s, in S1 pattern) @safe pure if (isSomeString!S && isSomeString!S1)
- {
- size_t count;
- foreach (dchar c; s)
- {
- count += inPattern(c, pattern);
- }
- return count;
- }
- unittest
- {
- debug(string) printf("std.string.count.unittest\n");
- assertCTFEable!(
- {
- assert(countchars("abc", "a-c") == 3);
- assert(countchars("hello world", "or") == 3);
- });
- }
- /********************************************
- * Return string that is s with all characters removed that match pattern.
- */
- S removechars(S)(S s, in S pattern) @safe pure if (isSomeString!S)
- {
- Unqual!(typeof(s[0]))[] r;
- bool changed = false;
- foreach (size_t i, dchar c; s)
- {
- if (inPattern(c, pattern))
- {
- if (!changed)
- {
- changed = true;
- r = s[0 .. i].dup;
- }
- continue;
- }
- if (changed)
- {
- std.utf.encode(r, c);
- }
- }
- if (changed)
- return r;
- else
- return s;
- }
- unittest
- {
- debug(string) printf("std.string.removechars.unittest\n");
- assertCTFEable!(
- {
- assert(removechars("abc", "a-c").length == 0);
- assert(removechars("hello world", "or") == "hell wld");
- assert(removechars("hello world", "d") == "hello worl");
- assert(removechars("hah", "h") == "a");
- });
- }
- /***************************************************
- * Return string where sequences of a character in s[] from pattern[]
- * are replaced with a single instance of that character.
- * If pattern is null, it defaults to all characters.
- */
- S squeeze(S)(S s, in S pattern = null)
- {
- Unqual!(typeof(s[0]))[] r;
- dchar lastc;
- size_t lasti;
- int run;
- bool changed;
- foreach (size_t i, dchar c; s)
- {
- if (run && lastc == c)
- {
- changed = true;
- }
- else if (pattern is null || inPattern(c, pattern))
- {
- run = 1;
- if (changed)
- {
- if (r is null)
- r = s[0 .. lasti].dup;
- std.utf.encode(r, c);
- }
- else
- lasti = i + std.utf.stride(s, i);
- lastc = c;
- }
- else
- {
- run = 0;
- if (changed)
- {
- if (r is null)
- r = s[0 .. lasti].dup;
- std.utf.encode(r, c);
- }
- }
- }
- return changed ? ((r is null) ? s[0 .. lasti] : cast(S) r) : s;
- }
- unittest
- {
- debug(string) printf("std.string.squeeze.unittest\n");
- assertCTFEable!(
- {
- string s;
- assert(squeeze("hello") == "helo");
- s = "abcd";
- assert(squeeze(s) is s);
- s = "xyzz";
- assert(squeeze(s).ptr == s.ptr); // should just be a slice
- assert(squeeze("hello goodbyee", "oe") == "hello godbye");
- });
- }
- /***************************************************************
- Finds the position $(D_PARAM pos) of the first character in $(D_PARAM
- s) that does not match $(D_PARAM pattern) (in the terminology used by
- $(LINK2 std_string.html,inPattern)). Updates $(D_PARAM s =
- s[pos..$]). Returns the slice from the beginning of the original
- (before update) string up to, and excluding, $(D_PARAM pos).
- Example:
- ---
- string s = "123abc";
- string t = munch(s, "0123456789");
- assert(t == "123" && s == "abc");
- t = munch(s, "0123456789");
- assert(t == "" && s == "abc");
- ---
- The $(D_PARAM munch) function is mostly convenient for skipping
- certain category of characters (e.g. whitespace) when parsing
- strings. (In such cases, the return value is not used.)
- */
- S1 munch(S1, S2)(ref S1 s, S2 pattern)
- {
- size_t j = s.length;
- foreach (i, dchar c; s)
- {
- if (!inPattern(c, pattern))
- {
- j = i;
- break;
- }
- }
- scope(exit) s = s[j .. $];
- return s[0 .. j];
- }
- @safe pure unittest
- {
- string s = "123€abc";
- string t = munch(s, "0123456789");
- assert(t == "123" && s == "€abc");
- t = munch(s, "0123456789");
- assert(t == "" && s == "€abc");
- t = munch(s, "£$€¥");
- assert(t == "€" && s == "abc");
- }
- /**********************************************
- * Return string that is the 'successor' to s[].
- * If the rightmost character is a-zA-Z0-9, it is incremented within
- * its case or digits. If it generates a carry, the process is
- * repeated with the one to its immediate left.
- */
- S succ(S)(S s) @safe pure if (isSomeString!S)
- {
- if (s.length && std.ascii.isAlphaNum(s[$ - 1]))
- {
- auto r = s.dup;
- size_t i = r.length - 1;
- while (1)
- {
- dchar c = s[i];
- dchar carry;
- switch (c)
- {
- case '9':
- c = '0';
- carry = '1';
- goto Lcarry;
- case 'z':
- case 'Z':
- c -= 'Z' - 'A';
- carry = c;
- Lcarry:
- r[i] = cast(char)c;
- if (i == 0)
- {
- auto t = new typeof(r[0])[r.length + 1];
- t[0] = cast(char) carry;
- t[1 .. $] = r[];
- return t;
- }
- i--;
- break;
- default:
- if (std.ascii.isAlphaNum(c))
- r[i]++;
- return r;
- }
- }
- }
- return s;
- }
- unittest
- {
- debug(string) printf("std.string.succ.unittest\n");
- assertCTFEable!(
- {
- assert(succ(string.init) is null);
- assert(succ("!@#$%") == "!@#$%");
- assert(succ("1") == "2");
- assert(succ("9") == "10");
- assert(succ("999") == "1000");
- assert(succ("zz99") == "aaa00");
- });
- }
- /++
- Replaces the characters in $(D str) which are in $(D from) with the
- the corresponding characters in $(D to) and returns the resulting string.
- $(D tr) is based on
- $(WEB pubs.opengroup.org/onlinepubs/9699919799/utilities/_tr.html, Posix's tr),
- though it doesn't do everything that the Posix utility does.
- Params:
- str = The original string.
- from = The characters to replace.
- to = The characters to replace with.
- modifiers = String containing modifiers.
- Modifiers:
- $(BOOKTABLE,
- $(TR $(TD Modifier) $(TD Description))
- $(TR $(TD $(D 'c')) $(TD Complement the list of characters in $(D from)))
- $(TR $(TD $(D 'd')) $(TD Removes matching characters with no corresponding
- replacement in $(D to)))
- $(TR $(TD $(D 's')) $(TD Removes adjacent duplicates in the replaced
- characters))
- )
- If the modifier $(D 'd') is present, then the number of characters in
- $(D to) may be only $(D 0) or $(D 1).
- If the modifier $(D 'd') is $(I not) present, and $(D to) is empty, then
- $(D to) is taken to be the same as $(D from).
- If the modifier $(D 'd') is $(I not) present, and $(D to) is shorter than
- $(D from), then $(D to) is extended by replicating the last character in
- $(D to).
- Both $(D from) and $(D to) may contain ranges using the $(D '-') character
- (e.g. $(D "a-d") is synonymous with $(D "abcd").) Neither accept a leading
- $(D '^') as meaning the complement of the string (use the $(D 'c') modifier
- for that).
- +/
- C1[] tr(C1, C2, C3, C4 = immutable char)
- (C1[] str, const(C2)[] from, const(C3)[] to, const(C4)[] modifiers = null)
- {
- bool mod_c;
- bool mod_d;
- bool mod_s;
- foreach (char c; modifiers)
- {
- switch (c)
- {
- case 'c': mod_c = 1; break; // complement
- case 'd': mod_d = 1; break; // delete unreplaced chars
- case 's': mod_s = 1; break; // squeeze duplicated replaced chars
- default: assert(0);
- }
- }
- if (to.empty && !mod_d)
- to = std.conv.to!(typeof(to))(from);
- auto result = appender!(C1[])();
- bool modified;
- dchar lastc;
- foreach (dchar c; str)
- {
- dchar lastf;
- dchar lastt;
- dchar newc;
- int n = 0;
- for (size_t i = 0; i < from.length; )
- {
- dchar f = std.utf.decode(from, i);
- if (f == '-' && lastf != dchar.init && i < from.length)
- {
- dchar nextf = std.utf.decode(from, i);
- if (lastf <= c && c <= nextf)
- {
- n += c - lastf - 1;
- if (mod_c)
- goto Lnotfound;
- goto Lfound;
- }
- n += nextf - lastf;
- lastf = lastf.init;
- continue;
- }
- if (c == f)
- { if (mod_c)
- goto Lnotfound;
- goto Lfound;
- }
- lastf = f;
- n++;
- }
- if (!mod_c)
- goto Lnotfound;
- n = 0; // consider it 'found' at position 0
- Lfound:
- // Find the nth character in to[]
- dchar nextt;
- for (size_t i = 0; i < to.length; )
- { dchar t = std.utf.decode(to, i);
- if (t == '-' && lastt != dchar.init && i < to.length)
- {
- nextt = std.utf.decode(to, i);
- n -= nextt - lastt;
- if (n < 0)
- {
- newc = nextt + n + 1;
- goto Lnewc;
- }
- lastt = dchar.init;
- continue;
- }
- if (n == 0)
- { newc = t;
- goto Lnewc;
- }
- lastt = t;
- nextt = t;
- n--;
- }
- if (mod_d)
- continue;
- newc = nextt;
- Lnewc:
- if (mod_s && modified && newc == lastc)
- continue;
- result.put(newc);
- assert(newc != dchar.init);
- modified = true;
- lastc = newc;
- continue;
- Lnotfound:
- result.put(c);
- lastc = c;
- modified = false;
- }
- return result.data;
- }
- unittest
- {
- debug(string) printf("std.string.tr.unittest\n");
- import std.algorithm;
- // Complete list of test types; too slow to test'em all
- // alias TestTypes = TypeTuple!(
- // char[], const( char)[], immutable( char)[],
- // wchar[], const(wchar)[], immutable(wchar)[],
- // dchar[], const(dchar)[], immutable(dchar)[]);
- // Reduced list of test types
- alias TestTypes = TypeTuple!(char[], const(wchar)[], immutable(dchar)[]);
- assertCTFEable!(
- {
- foreach (S; TestTypes)
- {
- foreach (T; TestTypes)
- {
- foreach (U; TestTypes)
- {
- assert(equal(tr(to!S("abcdef"), to!T("cd"), to!U("CD")), "abCDef"));
- assert(equal(tr(to!S("abcdef"), to!T("b-d"), to!U("B-D")), "aBCDef"));
- assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-Dx")), "aBCDefgx"));
- assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-CDx")), "aBCDefgx"));
- assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-BCDx")), "aBCDefgx"));
- assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U("*"), to!S("c")), "****ef"));
- assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U(""), to!T("d")), "abcd"));
- assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U(""), to!U("s")), "helo godbye"));
- assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U("x"), "s"), "hex gxdbye"));
- assert(equal(tr(to!S("14-Jul-87"), to!T("a-zA-Z"), to!U(" "), "cs"), " Jul "));
- assert(equal(tr(to!S("Abc"), to!T("AAA"), to!U("XYZ")), "Xbc"));
- }
- }
- auto s = to!S("hello world");
- static assert(is(typeof(s) == typeof(tr(s, "he", "if"))));
- }
- });
- }
- /* ************************************************
- * Version : v0.3
- * Author : David L. 'SpottedTiger' Davis
- * Date Created : 31.May.05 Compiled and Tested with dmd v0.125
- * Date Modified : 01.Jun.05 Modified the function to handle the
- * : imaginary and complex float-point
- * : datatypes.
- * :
- * Licence : Public Domain / Contributed to Digital Mars
- */
- /**
- * [in] string s can be formatted in the following ways:
- *
- * Integer Whole Number:
- * (for byte, ubyte, short, ushort, int, uint, long, and ulong)
- * ['+'|'-']digit(s)[U|L|UL]
- *
- * examples: 123, 123UL, 123L, +123U, -123L
- *
- * Floating-Point Number:
- * (for float, double, real, ifloat, idouble, and ireal)
- * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
- * or [nan|nani|inf|-inf]
- *
- * examples: +123., -123.01, 123.3e-10f, 123.3e-10fi, 123.3e-10L
- *
- * (for cfloat, cdouble, and creal)
- * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+]
- * [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
- * or [nan|nani|nan+nani|inf|-inf]
- *
- * examples: nan, -123e-1+456.9e-10Li, +123e+10+456i, 123+456
- *
- * [in] bool bAllowSep
- * False by default, but when set to true it will accept the
- * separator characters "," and "_" within the string, but these
- * characters should be stripped from the string before using any
- * of the conversion functions like toInt(), toFloat(), and etc
- * else an error will occur.
- *
- * Also please note, that no spaces are allowed within the string
- * anywhere whether it's a leading, trailing, or embedded space(s),
- * thus they too must be stripped from the string before using this
- * function, or any of the conversion functions.
- */
- bool isNumeric(const(char)[] s, in bool bAllowSep = false) @safe pure
- {
- immutable iLen = s.length;
- if (iLen == 0)
- return false;
- // Check for NaN (Not a Number) and for Infinity
- if (s.among!((a, b) => icmp(a, b) == 0)
- ("nan", "nani", "nan+nani", "inf", "-inf"))
- return true;
- immutable j = s[0].among!('-', '+') != 0;
- bool bDecimalPoint, bExponent, bComplex, sawDigits;
- for (size_t i = j; i < iLen; i++)
- {
- immutable c = s[i];
- // Digits are good, continue checking
- // with the popFront character... ;)
- if (c >= '0' && c <= '9')
- {
- sawDigits = true;
- continue;
- }
- // Check for the complex type, and if found
- // reset the flags for checking the 2nd number.
- if (c == '+')
- {
- if (!i)
- return false;
- bDecimalPoint = false;
- bExponent = false;
- bComplex = true;
- sawDigits = false;
- continue;
- }
- // Allow only one exponent per number
- if (c.among!('e', 'E'))
- {
- // A 2nd exponent found, return not a number
- if (bExponent || i + 1 >= iLen)
- return false;
- // Look forward for the sign, and if
- // missing then this is not a number.
- if (!s[i + 1].among!('-', '+'))
- return false;
- bExponent = true;
- i++;
- continue;
- }
- // Allow only one decimal point per number to be used
- if (c == '.' )
- {
- // A 2nd decimal point found, return not a number
- if (bDecimalPoint)
- return false;
- bDecimalPoint = true;
- continue;
- }
- // Check for ending literal characters: "f,u,l,i,ul,fi,li",
- // and whether they're being used with the correct datatype.
- if (i == iLen - 2)
- {
- if (!sawDigits)
- return false;
- // Integer Whole Number
- if (icmp(s[i..iLen], "ul") == 0 &&
- (!bDecimalPoint && !bExponent && !bComplex))
- return true;
- // Floating-Point Number
- if (s[i..iLen].among!((a, b) => icmp(a, b) == 0)("fi", "li") &&
- (bDecimalPoint || bExponent || bComplex))
- return true;
- if (icmp(s[i..iLen], "ul") == 0 &&
- (bDecimalPoint || bExponent || bComplex))
- return false;
- // Could be a Integer or a Float, thus
- // all these suffixes are valid for both
- return s[i..iLen].among!((a, b) => icmp(a, b) == 0)
- ("ul", "fi", "li") != 0;
- }
- if (i == iLen - 1)
- {
- if (!sawDigits)
- return false;
- // Integer Whole Number
- if (c.among!('u', 'l', 'U', 'L') &&
- (!bDecimalPoint && !bExponent && !bComplex))
- return true;
- // Check to see if the last character in the string
- // is the required 'i' character
- if (bComplex)
- return c.among!('i', 'I') != 0;
- // Floating-Point Number
- return c.among!('l', 'L', 'f', 'F', 'i', 'I') != 0;
- }
- // Check if separators are allowed to be in the numeric string
- if (!bAllowSep || !c.among!('_', ','))
- return false;
- }
- return sawDigits;
- }
- unittest
- {
- assert(!isNumeric("F"));
- assert(!isNumeric("L"));
- assert(!isNumeric("U"));
- assert(!isNumeric("i"));
- assert(!isNumeric("fi"));
- assert(!isNumeric("ul"));
- assert(!isNumeric("li"));
- assert(!isNumeric("."));
- assert(!isNumeric("-"));
- assert(!isNumeric("+"));
- assert(!isNumeric("e-"));
- assert(!isNumeric("e+"));
- assert(!isNumeric(".f"));
- assert(!isNumeric("e+f"));
- }
- unittest
- {
- debug(string) printf("isNumeric(in string, bool = false).unittest\n");
- assertCTFEable!(
- {
- // Test the isNumeric(in string) function
- assert(isNumeric("1") == true );
- assert(isNumeric("1.0") == true );
- assert(isNumeric("1e-1") == true );
- assert(isNumeric("12345xxxx890") == false );
- assert(isNumeric("567L") == true );
- assert(isNumeric("23UL") == true );
- assert(isNumeric("-123..56f") == false );
- assert(isNumeric("12.3.5.6") == false );
- assert(isNumeric(" 12.356") == false );
- assert(isNumeric("123 5.6") == false );
- assert(isNumeric("1233E-1+1.0e-1i") == true );
- assert(isNumeric("123.00E-5+1234.45E-12Li") == true);
- assert(isNumeric("123.00e-5+1234.45E-12iL") == false);
- assert(isNumeric("123.00e-5+1234.45e-12uL") == false);
- assert(isNumeric("123.00E-5+1234.45e-12lu") == false);
- assert(isNumeric("123fi") == true);
- assert(isNumeric("123li") == true);
- assert(isNumeric("--123L") == false);
- assert(isNumeric("+123.5UL") == false);
- assert(isNumeric("123f") == true);
- assert(isNumeric("123.u") == false);
- // @@@BUG@@ to!string(float) is not CTFEable.
- // Related: formatValue(T) if (is(FloatingPointTypeOf!T))
- if (!__ctfe)
- {
- assert(isNumeric(to!string(real.nan)) == true);
- assert(isNumeric(to!string(-real.infinity)) == true);
- assert(isNumeric(to!string(123e+2+1234.78Li)) == true);
- }
- string s = "$250.99-";
- assert(isNumeric(s[1..s.length - 2]) == true);
- assert(isNumeric(s) == false);
- assert(isNumeric(s[0..s.length - 1]) == false);
- });
- assert(!isNumeric("-"));
- assert(!isNumeric("+"));
- }
- /*****************************
- * Soundex algorithm.
- *
- * The Soundex algorithm converts a word into 4 characters
- * based on how the word sounds phonetically. The idea is that
- * two spellings that sound alike will have the same Soundex
- * value, which means that Soundex can be used for fuzzy matching
- * of names.
- *
- * Params:
- * string = String to convert to Soundex representation.
- * buffer = Optional 4 char array to put the resulting Soundex
- * characters into. If null, the return value
- * buffer will be allocated on the heap.
- * Returns:
- * The four character array with the Soundex result in it.
- * Returns null if there is no Soundex representation for the string.
- *
- * See_Also:
- * $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia),
- * $(LUCKY The Soundex Indexing System)
- *
- * Bugs:
- * Only works well with English names.
- * There are other arguably better Soundex algorithms,
- * but this one is the standard one.
- */
- char[] soundex(const(char)[] string, char[] buffer = null) @safe pure nothrow
- in
- {
- assert(!buffer.ptr || buffer.length >= 4);
- }
- out (result)
- {
- if (result.ptr)
- {
- assert(result.length == 4);
- assert(result[0] >= 'A' && result[0] <= 'Z');
- foreach (char c; result[1 .. 4])
- assert(c >= '0' && c <= '6');
- }
- }
- body
- {
- static immutable dex =
- // ABCDEFGHIJKLMNOPQRSTUVWXYZ
- "01230120022455012623010202";
- int b = 0;
- char lastc;
- foreach (char cs; string)
- { auto c = cs; // necessary because cs is final
- if (c >= 'a' && c <= 'z')
- c -= 'a' - 'A';
- else if (c >= 'A' && c <= 'Z')
- {
- }
- else
- {
- lastc = lastc.init;
- continue;
- }
- if (b == 0)
- {
- if (!buffer.ptr)
- buffer = new char[4];
- buffer[0] = c;
- b++;
- lastc = dex[c - 'A'];
- }
- else
- {
- if (c == 'H' || c == 'W')
- continue;
- if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
- lastc = lastc.init;
- c = dex[c - 'A'];
- if (c != '0' && c != lastc)
- {
- buffer[b] = c;
- b++;
- lastc = c;
- }
- }
- if (b == 4)
- goto Lret;
- }
- if (b == 0)
- buffer = null;
- else
- buffer[b .. 4] = '0';
- Lret:
- return buffer;
- }
- @safe pure nothrow unittest
- {
- assertCTFEable!(
- {
- char[4] buffer;
- assert(soundex(null) == null);
- assert(soundex("") == null);
- assert(soundex("0123^&^^**&^") == null);
- assert(soundex("Euler") == "E460");
- assert(soundex(" Ellery ") == "E460");
- assert(soundex("Gauss") == "G200");
- assert(soundex("Ghosh") == "G200");
- assert(soundex("Hilbert") == "H416");
- assert(soundex("Heilbronn") == "H416");
- assert(soundex("Knuth") == "K530");
- assert(soundex("Kant", buffer) == "K530");
- assert(soundex("Lloyd") == "L300");
- assert(soundex("Ladd") == "L300");
- assert(soundex("Lukasiewicz", buffer) == "L222");
- assert(soundex("Lissajous") == "L222");
- assert(soundex("Robert") == "R163");
- assert(soundex("Rupert") == "R163");
- assert(soundex("Rubin") == "R150");
- assert(soundex("Washington") == "W252");
- assert(soundex("Lee") == "L000");
- assert(soundex("Gutierrez") == "G362");
- assert(soundex("Pfister") == "P236");
- assert(soundex("Jackson") == "J250");
- assert(soundex("Tymczak") == "T522");
- assert(soundex("Ashcraft") == "A261");
- assert(soundex("Woo") == "W000");
- assert(soundex("Pilgrim") == "P426");
- assert(soundex("Flingjingwaller") == "F452");
- assert(soundex("PEARSE") == "P620");
- assert(soundex("PIERCE") == "P620");
- assert(soundex("Price") == "P620");
- assert(soundex("CATHY") == "C300");
- assert(soundex("KATHY") == "K300");
- assert(soundex("Jones") == "J520");
- assert(soundex("johnsons") == "J525");
- assert(soundex("Hardin") == "H635");
- assert(soundex("Martinez") == "M635");
- });
- }
- /***************************************************
- * Construct an associative array consisting of all
- * abbreviations that uniquely map to the strings in values.
- *
- * This is useful in cases where the user is expected to type
- * in one of a known set of strings, and the program will helpfully
- * autocomplete the string once sufficient characters have been
- * entered that uniquely identify it.
- * Example:
- * ---
- * import std.stdio;
- * import std.string;
- *
- * void main()
- * {
- * static string[] list = [ "food", "foxy" ];
- *
- * auto abbrevs = std.string.abbrev(list);
- *
- * foreach (key, value; abbrevs)
- * {
- * writefln("%s => %s", key, value);
- * }
- * }
- * ---
- * produces the output:
- * <pre>
- * fox => foxy
- * food => food
- * foxy => foxy
- * foo => food
- * </pre>
- */
- string[string] abbrev(string[] values) @safe pure
- {
- string[string] result;
- // Make a copy when sorting so we follow COW principles.
- values = values.dup.sort; // @@@BUG@@@ not CTFEable
- size_t values_length = values.length;
- size_t lasti = values_length;
- size_t nexti;
- string nv;
- string lv;
- for (size_t i = 0; i < values_length; i = nexti)
- {
- string value = values[i];
- // Skip dups
- for (nexti = i + 1; nexti < values_length; nexti++)
- {
- nv = values[nexti];
- if (value != values[nexti])
- break;
- }
- for (size_t j = 0; j < value.length; j += std.utf.stride(value, j))
- {
- string v = value[0 .. j];
- if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) &&
- (lasti == values_length || j > lv.length || v != lv[0 .. j]))
- {
- result[v] = value;
- }
- }
- result[value] = value;
- lasti = i;
- lv = value;
- }
- return result;
- }
- unittest
- {
- debug(string) printf("string.abbrev.unittest\n");
- // @@@BUG@@@ Built-in arr.sort is not CTFEable
- //assertCTFEable!(
- //{
- string[] values;
- values ~= "hello";
- values ~= "hello";
- values ~= "he";
- string[string] r;
- r = abbrev(values);
- auto keys = r.keys.dup;
- keys.sort;
- assert(keys.length == 4);
- assert(keys[0] == "he");
- assert(keys[1] == "hel");
- assert(keys[2] == "hell");
- assert(keys[3] == "hello");
- assert(r[keys[0]] == "he");
- assert(r[keys[1]] == "hello");
- assert(r[keys[2]] == "hello");
- assert(r[keys[3]] == "hello");
- //});
- }
- /******************************************
- * Compute column number after string if string starts in the
- * leftmost column, which is numbered starting from 0.
- */
- size_t column(S)(S str, size_t tabsize = 8) @safe pure if (isSomeString!S)
- {
- size_t column;
- foreach (dchar c; str)
- {
- switch (c)
- {
- case '\t':
- column = (column + tabsize) / tabsize * tabsize;
- break;
- case '\r':
- case '\n':
- case paraSep:
- case lineSep:
- column = 0;
- break;
- default:
- column++;
- break;
- }
- }
- return column;
- }
- unittest
- {
- debug(string) printf("string.column.unittest\n");
- assertCTFEable!(
- {
- assert(column(string.init) == 0);
- assert(column("") == 0);
- assert(column("\t") == 8);
- assert(column("abc\t") == 8);
- assert(column("12345678\t") == 16);
- });
- }
- /******************************************
- * Wrap text into a paragraph.
- *
- * The input text string s is formed into a paragraph
- * by breaking it up into a sequence of lines, delineated
- * by \n, such that the number of columns is not exceeded
- * on each line.
- * The last line is terminated with a \n.
- * Params:
- * s = text string to be wrapped
- * columns = maximum number of _columns in the paragraph
- * firstindent = string used to _indent first line of the paragraph
- * indent = string to use to _indent following lines of the paragraph
- * tabsize = column spacing of tabs
- * Returns:
- * The resulting paragraph.
- */
- S wrap(S)(S s, size_t columns = 80, S firstindent = null,
- S indent = null, size_t tabsize = 8) @safe pure if (isSomeString!S)
- {
- typeof(s.dup) result;
- int spaces;
- bool inword;
- bool first = true;
- size_t wordstart;
- result.length = firstindent.length + s.length;
- result.length = firstindent.length;
- result[] = firstindent[];
- auto col = column(result.idup, tabsize);
- foreach (size_t i, dchar c; s)
- {
- if (std.uni.isWhite(c))
- {
- if (inword)
- {
- if (first)
- {
- }
- else if (col + 1 + (i - wordstart) > columns)
- {
- result ~= '\n';
- result ~= indent;
- col = column(indent, tabsize);
- }
- else
- {
- result ~= ' ';
- col += 1;
- }
- result ~= s[wordstart .. i];
- col += i - wordstart;
- inword = false;
- first = false;
- }
- }
- else
- {
- if (!inword)
- {
- wordstart = i;
- inword = true;
- }
- }
- }
- if (inword)
- {
- if (col + 1 + (s.length - wordstart) >= columns)
- {
- result ~= '\n';
- result ~= indent;
- }
- else if (result.length != firstindent.length)
- result ~= ' ';
- result ~= s[wordstart .. s.length];
- }
- result ~= '\n';
- return result;
- }
- unittest
- {
- debug(string) printf("string.wrap.unittest\n");
- assertCTFEable!(
- {
- assert(wrap(string.init) == "\n");
- assert(wrap(" a b df ") == "a b df\n");
- assert(wrap(" a b df ", 3) == "a b\ndf\n");
- assert(wrap(" a bc df ", 3) == "a\nbc\ndf\n");
- assert(wrap(" abcd df ", 3) == "abcd\ndf\n");
- assert(wrap("x") == "x\n");
- assert(wrap("u u") == "u u\n");
- });
- }
- /******************************************
- * Removes indentation from a multi-line string or an array of single-line strings.
- *
- * This uniformly outdents the text as much as possible.
- * Whitespace-only lines are always converted to blank lines.
- *
- * A StringException will be thrown if inconsistent indentation prevents
- * the input from being outdented.
- *
- * Works at compile-time.
- *
- * Example:
- * ---
- * writeln(q{
- * import std.stdio;
- * void main() {
- * writeln("Hello");
- * }
- * }.outdent());
- * ---
- *
- * Output:
- * ---
- *
- * import std.stdio;
- * void main() {
- * writeln("Hello");
- * }
- *
- * ---
- *
- */
- S outdent(S)(S str) @safe pure if(isSomeString!S)
- {
- return str.splitLines(KeepTerminator.yes).outdent().join();
- }
- /// ditto
- S[] outdent(S)(S[] lines) @safe pure if(isSomeString!S)
- {
- if (lines.empty)
- {
- return null;
- }
- static S leadingWhiteOf(S str)
- {
- return str[ 0 .. $-find!(not!(std.uni.isWhite))(str).length ];
- }
- S shortestIndent;
- foreach (i, line; lines)
- {
- auto stripped = __ctfe? line.ctfe_strip() : line.strip();
- if (stripped.empty)
- {
- lines[i] = line[line.chomp().length..$];
- }
- else
- {
- auto indent = leadingWhiteOf(line);
- // Comparing number of code units instead of code points is OK here
- // because this function throws upon inconsistent indentation.
- if (shortestIndent is null || indent.length < shortestIndent.length)
- {
- if (indent.empty)
- return lines;
- shortestIndent = indent;
- }
- }
- }
- foreach (i; 0..lines.length)
- {
- auto stripped = __ctfe? lines[i].ctfe_strip() : lines[i].strip();
- if (stripped.empty)
- {
- // Do nothing
- }
- else if (lines[i].startsWith(shortestIndent))
- {
- lines[i] = lines[i][shortestIndent.length..$];
- }
- else
- {
- if (__ctfe)
- assert(false, "outdent: Inconsistent indentation");
- else
- throw new StringException("outdent: Inconsistent indentation");
- }
- }
- return lines;
- }
- // TODO: Remove this and use std.string.strip when retro() becomes ctfe-able.
- private S ctfe_strip(S)(S str) if(isSomeString!(Unqual!S))
- {
- return str.stripLeft().ctfe_stripRight();
- }
- // TODO: Remove this and use std.string.strip when retro() becomes ctfe-able.
- private S ctfe_stripRight(S)(S str) if(isSomeString!(Unqual!S))
- {
- size_t endIndex = 0;
- size_t prevIndex = str.length;
- foreach_reverse (i, dchar ch; str)
- {
- if (!std.uni.isWhite(ch))
- {
- endIndex = prevIndex;
- break;
- }
- prevIndex = i;
- }
- return str[0..endIndex];
- }
- unittest
- {
- debug(string) printf("string.outdent.unittest\n");
- template outdent_testStr(S)
- {
- enum S outdent_testStr =
- "
- \t\tX
- \t\U00010143X
- \t\t
- \t\t\tX
- \t ";
- }
- template outdent_expected(S)
- {
- enum S outdent_expected =
- "
- \tX
- \U00010143X
- \t\tX
- ";
- }
- assertCTFEable!(
- {
- static assert(ctfe_strip(" \tHi \r\n") == "Hi");
- static assert(ctfe_strip(" \tHi©\u2028 \r\n") == "Hi©");
- static assert(ctfe_strip("Hi") == "Hi");
- static assert(ctfe_strip(" \t \r\n") == "");
- static assert(ctfe_strip("") == "");
- foreach (S; TypeTuple!(string, wstring, dstring))
- {
- enum S blank = "";
- assert(blank.outdent() == blank);
- static assert(blank.outdent() == blank);
- enum S testStr1 = " \n \t\n ";
- enum S expected1 = "\n\n";
- assert(testStr1.outdent() == expected1);
- static assert(testStr1.outdent() == expected1);
- assert(testStr1[0..$-1].outdent() == expected1);
- static assert(testStr1[0..$-1].outdent() == expected1);
- enum S testStr2 = "a\n \t\nb";
- assert(testStr2.outdent() == testStr2);
- static assert(testStr2.outdent() == testStr2);
- enum S testStr3 =
- "
- \t\tX
- \t\U00010143X
- \t\t
- \t\t\tX
- \t ";
- enum S expected3 =
- "
- \tX
- \U00010143X
- \t\tX
- ";
- assert(testStr3.outdent() == expected3);
- static assert(testStr3.outdent() == expected3);
- enum testStr4 = " X\r X\n X\r\n X\u2028 X\u2029 X";
- enum expected4 = "X\rX\nX\r\nX\u2028X\u2029X";
- assert(testStr4.outdent() == expected4);
- static assert(testStr4.outdent() == expected4);
- enum testStr5 = testStr4[0..$-1];
- enum expected5 = expected4[0..$-1];
- assert(testStr5.outdent() == expected5);
- static assert(testStr5.outdent() == expected5);
- enum testStr6 = " \r \n \r\n \u2028 \u2029";
- enum expected6 = "\r\n\r\n\u2028\u2029";
- assert(testStr6.outdent() == expected6);
- static assert(testStr6.outdent() == expected6);
- }
- });
- }
- debug(string)void main(){}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement