/**
Unicode Character Types
*/
module std.uni;
import core.stdc.stdint;
import core.stdc.stdio;
import std.ctype; // this really needs pure functions that return bools
class UCharException : Exception {
this(string msg, dchar c) {
char[255] buf = void;
uint len = snprintf(buf.ptr, buf.length, "%*s: \\u%04X\\u%04X", msg,
c & 0xFFFF, (c >> 15) & 0xFFFF);
super(buf[0 .. len].idup);
}
}
// Lu, Ll, Lt, Lm, Lo
/*pure*/ bool isUniAlpha(dchar code) {
if(code <= 0x7F) return cast(bool)isalpha(code);
switch(getType(code)) {
case CharType.UPPERCASE_LETTER:
case CharType.LOWERCASE_LETTER:
case CharType.TITLECASE_LETTER:
case CharType.MODIFIER_LETTER:
case CharType.OTHER_LETTER:
return true;
default:
return false;
}
}
// Lu
/*pure*/ bool isUniUpper(dchar code) {
if(code <= 0x7F) return cast(bool)isupper(code);
return getType(code) == CharType.UPPERCASE_LETTER;
}
// Ll
/*pure*/ bool isUniLower(dchar code) {
if(code <= 0x7F) return cast(bool)islower(code);
return getType(code) == CharType.LOWERCASE_LETTER;
}
// Lt
/*pure*/ bool isUniTitle(dchar code) {
return getType(code) == CharType.TITLECASE_LETTER;
}
// Lu, Ll, Lt, Lm, Lo, Nd
/*pure*/ bool isUniAlphaNum(dchar code) {
if(code <= 0x7F) return cast(bool)isalnum(code);
switch(getType(code)) {
case CharType.UPPERCASE_LETTER:
case CharType.LOWERCASE_LETTER:
case CharType.TITLECASE_LETTER:
case CharType.MODIFIER_LETTER:
case CharType.OTHER_LETTER:
case CharType.DECIMAL_DIGIT_NUMBER:
return true;
default:
return false;
}
}
// Nd, Nl, No
/*pure*/ bool isUniNumber(dchar code) {
if(code <= 0x7F) return cast(bool)isdigit(code);
switch(getType(code)) {
case CharType.DECIMAL_DIGIT_NUMBER:
case CharType.LETTER_NUMBER:
case CharType.OTHER_NUMBER:
return true;
default:
return false;
}
}
// Nd
/*pure*/ bool isUniDigit(dchar code) {
if(code <= 0x7F) return cast(bool)isdigit(code);
return getType(code) == CharType.DECIMAL_DIGIT_NUMBER;
}
// Zs, Zl, Zp
/*pure*/ bool isUniSeparator(dchar code) {
if(code <= 0x7F) return cast(bool)isspace(code);
switch(getType(code)) {
case CharType.SPACE_SEPARATOR:
case CharType.LINE_SEPARATOR:
case CharType.PARAGRAPH_SEPARATOR:
return true;
default:
return false;
}
}
// Zs
/*pure*/ bool isUniSpace(dchar code) {
if(code <= 0x7F) return cast(bool)isspace(code);
return getType(code) == CharType.SPACE_SEPARATOR;
}
// Zl
/*pure*/ bool isUniLine(dchar code) {
return getType(code) == CharType.LINE_SEPARATOR;
}
// Zp
/*pure*/ bool isUniParagraph(dchar code) {
return getType(code) == CharType.PARAGRAPH_SEPARATOR;
}
// Mn, Mc, Me
/*pure*/ bool isUniMark(dchar code) {
switch(getType(code)) {
case CharType.NONSPACING_MARK:
case CharType.COMBINING_SPACING_MARK:
case CharType.ENCLOSING_MARK:
return true;
default:
return false;
}
}
/*pure*/ bool isUniNonspacing(dchar code) {
return getType(code) == CharType.NONSPACING_MARK;
}
// Pc, Pd, Ps, Pe, Pi, Pf, Po
/*pure*/ bool isUniPunctuation(dchar code) {
if(code <= 0x7F) return cast(bool)ispunct(code);
switch(getType(code)) {
case CharType.CONNECTOR_PUNCTUATION:
case CharType.DASH_PUNCTUATION:
case CharType.OPEN_PUNCTUATION:
case CharType.CLOSE_PUNCTUATION:
case CharType.INITIAL_QUOTE_PUNCTUATION:
case CharType.FINAL_QUOTE_PUNCTUATION:
case CharType.OTHER_PUNCTUATION:
return true;
default:
return false;
}
}
// Sm, Sc, Sk, So
/*pure*/ bool isUniSymbol(dchar code) {
switch(getType(code)) {
case CharType.MATH_SYMBOL:
case CharType.CURRENCY_SYMBOL:
case CharType.MODIFIER_SYMBOL:
case CharType.OTHER_SYMBOL:
return true;
default:
return false;
}
}
// Cc, Cf, Cs, Co, Cn
/*pure*/ bool isUniOther(dchar code) {
switch(getType(code)) {
case CharType.CONTROL:
case CharType.FORMAT:
case CharType.SURROGATE:
case CharType.PRIVATE_USE:
case CharType.NOT_ASSIGNED:
return true;
default:
return false;
}
}
// Cc
/*pure*/ bool isUniControl(dchar code) {
if(code <= 0x7F) return cast(bool)iscntrl(code);
return getType(code) == CharType.CONTROL;
}
// Cf
/*pure*/ bool isUniFormat(dchar code) {
return getType(code) == CharType.FORMAT;
}
// Cs
/*pure*/ bool isUniSurrogate(dchar code) {
return getType(code) == CharType.SURROGATE;
}
// Co
/*pure*/ bool isUniPrivateUse(dchar code) {
return getType(code) == CharType.PRIVATE_USE;
}
// Lu, Ll, Lt, Lm, Lo, Nd, Nl, No, Mn, Mc, Me,
// Pc, Pd, Ps, Pe, Pi, Pf, Po, Sm, Sc, Sk, So
/*pure*/ bool isUniGraph(dchar code) {
if(code <= 0x7F) return cast(bool)isgraph(code);
switch(getType(code)) {
case CharType.UPPERCASE_LETTER:
case CharType.LOWERCASE_LETTER:
case CharType.TITLECASE_LETTER:
case CharType.MODIFIER_LETTER:
case CharType.OTHER_LETTER:
case CharType.DECIMAL_DIGIT_NUMBER:
case CharType.LETTER_NUMBER:
case CharType.OTHER_NUMBER:
case CharType.NONSPACING_MARK:
case CharType.COMBINING_SPACING_MARK:
case CharType.ENCLOSING_MARK:
case CharType.CONNECTOR_PUNCTUATION:
case CharType.DASH_PUNCTUATION:
case CharType.OPEN_PUNCTUATION:
case CharType.CLOSE_PUNCTUATION:
case CharType.INITIAL_QUOTE_PUNCTUATION:
case CharType.FINAL_QUOTE_PUNCTUATION:
case CharType.OTHER_PUNCTUATION:
case CharType.MATH_SYMBOL:
case CharType.CURRENCY_SYMBOL:
case CharType.MODIFIER_SYMBOL:
case CharType.OTHER_SYMBOL:
return true;
default:
return false;
}
}
// Lu, Ll, Lt, Lm, Lo, Nd, Nl, No, Zs, Mn, Mc, Me,
// Pc, Pd, Ps, Pe, Pi, Pf, Po, Sm, Sc, Sk, So
/*pure*/ bool isUniPrint(dchar code) {
if(code <= 0x7F) return cast(bool)isprint(code);
switch(getType(code)) {
case CharType.UPPERCASE_LETTER:
case CharType.LOWERCASE_LETTER:
case CharType.TITLECASE_LETTER:
case CharType.MODIFIER_LETTER:
case CharType.OTHER_LETTER:
case CharType.DECIMAL_DIGIT_NUMBER:
case CharType.LETTER_NUMBER:
case CharType.OTHER_NUMBER:
case CharType.SPACE_SEPARATOR:
case CharType.NONSPACING_MARK:
case CharType.COMBINING_SPACING_MARK:
case CharType.ENCLOSING_MARK:
case CharType.CONNECTOR_PUNCTUATION:
case CharType.DASH_PUNCTUATION:
case CharType.OPEN_PUNCTUATION:
case CharType.CLOSE_PUNCTUATION:
case CharType.INITIAL_QUOTE_PUNCTUATION:
case CharType.FINAL_QUOTE_PUNCTUATION:
case CharType.OTHER_PUNCTUATION:
case CharType.MATH_SYMBOL:
case CharType.CURRENCY_SYMBOL:
case CharType.MODIFIER_SYMBOL:
case CharType.OTHER_SYMBOL:
return true;
default:
return false;
}
}
// Ws
/*pure*/ bool isUniDirWhiteSpace(dchar code) {
return getDirectionality(code) == Direction.WHITESPACE;
}
// R
/*pure*/ bool isUniDirLTL(dchar code) {
return getDirectionality(code) == Direction.LEFT_TO_RIGHT;
}
// L
/*pure*/ bool isUniDirRTL(dchar code) {
return getDirectionality(code) == Direction.RIGHT_TO_LEFT;
}
// L, R
/*pure*/ bool isUniDirStrong(dchar code) {
switch(getDirectionality(code)) {
case Direction.LEFT_TO_RIGHT:
case Direction.RIGHT_TO_LEFT:
return true;
default:
return false;
}
}
// En, Es, Et, An, Cs
/*pure*/ bool isUniDirWeak(dchar code) {
switch(getDirectionality(code)) {
case Direction.EUROPEAN_NUMBER:
case Direction.EUROPEAN_NUMBER_SEPARATOR:
case Direction.EUROPEAN_NUMBER_TERMINATOR:
case Direction.ARABIC_NUMBER:
case Direction.COMMON_NUMBER_SEPARATOR:
return true;
default:
return false;
}
}
//B, S, Ws, On
/*pure*/ bool isUniDirNeutral(dchar code) {
switch(getDirectionality(code)) {
case Direction.BLOCK_SEPARATOR:
case Direction.SEGMENT_SEPARATOR:
case Direction.WHITESPACE:
case Direction.OTHER_NEUTRALS:
return true;
default:
return false;
}
}
// B, S
/*pure*/ bool isUniDirSeparator(dchar code) {
switch(getDirectionality(code)) {
case Direction.BLOCK_SEPARATOR:
case Direction.SEGMENT_SEPARATOR:
return true;
default:
return false;
}
}
//
/*pure*/ bool isUniNonBreaking(dchar code) {
return getDecompositionType(code) == DecompositionType.NOBREAK;
}
/*pure*/ bool isUniMirroring(dchar code) {
return ((getPackedData(code) >> MIRRORED_SHIFT) & MIRRORED_MASK) != 0;
}
// Lu -> Lt
/*pure*/ dchar toUniLower(dchar code) {
return code + LCDIFF[(getPackedData(code) >> TOLOWER_SHIFT) & TOLOWER_MASK];
}
// Lt -> Lu
/*pure*/ dchar toUniUpper(dchar code) {
return code + UCDIFF[(getPackedData(code) >> TOUPPER_SHIFT) & TOUPPER_MASK];
}
// L& -> Lt
/*pure*/ dchar toUniTitle(dchar c) {
int32_t diff = TCDIFF[(getPackedData(c) >> TOTITLE_SHIFT) & TOTITLE_MASK];
return diff != TOTITLE_MASK ? c + diff : toUniUpper(c);
}
/*pure*/ dchar toUniMirror(dchar c) {
if(!isUniMirroring(c)) return c;
return c + MIRROR_DIFF[(getPackedData(c) >> MIRROR_SHIFT) & MIRROR_MASK];
}
// The following is taken from AndroidUnicode.h, AndroidUnicode.cpp and
// characterData.h from the Andriod project; after hunting high and low for
// documentation about Unicode on Google and not wanting to go for heavyweight
// implementations like ICU I found this gem. I ported it to D to implement
// the public Unicode api. Other implementations of interest are in V8 and
// Mozilla, some benchmarks would be useful to determine the one which performs
// best.
/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
private:
/*
* Directions specified in the Unicode standard. These directions map directly
* to java.lang.Character.
*/
enum Direction {
UNDEFINED = -1,
LEFT_TO_RIGHT,
RIGHT_TO_LEFT,
RIGHT_TO_LEFT_ARABIC,
EUROPEAN_NUMBER,
EUROPEAN_NUMBER_SEPARATOR,
EUROPEAN_NUMBER_TERMINATOR,
ARABIC_NUMBER,
COMMON_NUMBER_SEPARATOR,
NONSPACING_MARK,
BOUNDARY_NEUTRAL,
BLOCK_SEPARATOR,
SEGMENT_SEPARATOR,
WHITESPACE,
OTHER_NEUTRALS,
LEFT_TO_RIGHT_EMBEDDING,
LEFT_TO_RIGHT_OVERRIDE,
RIGHT_TO_LEFT_EMBEDDING,
RIGHT_TO_LEFT_OVERRIDE,
POP_DIRECTIONAL_FORMAT
}
/*
* Character types as specified in the Unicode standard. These map directly to
* java.lang.Character.
*/
enum CharType {
UNASSIGNED,
UPPERCASE_LETTER,
LOWERCASE_LETTER,
TITLECASE_LETTER,
MODIFIER_LETTER,
OTHER_LETTER,
NONSPACING_MARK,
ENCLOSING_MARK,
COMBINING_SPACING_MARK,
DECIMAL_DIGIT_NUMBER,
LETTER_NUMBER,
OTHER_NUMBER,
SPACE_SEPARATOR,
LINE_SEPARATOR,
PARAGRAPH_SEPARATOR,
CONTROL,
FORMAT,
NOT_ASSIGNED,
PRIVATE_USE,
SURROGATE,
DASH_PUNCTUATION,
OPEN_PUNCTUATION,
CLOSE_PUNCTUATION,
CONNECTOR_PUNCTUATION,
OTHER_PUNCTUATION,
MATH_SYMBOL,
CURRENCY_SYMBOL,
MODIFIER_SYMBOL,
OTHER_SYMBOL,
INITIAL_QUOTE_PUNCTUATION,
FINAL_QUOTE_PUNCTUATION
}
/*
* Decomposition types as described by the unicode standard. These values map to
* the same values in dchar.h in ICU.
*/
enum DecompositionType {
NONE,
CANONICAL,
COMPAT,
CIRCLE,
FINAL,
FONT,
FRACTION,
INITIAL,
ISOLATED,
MEDIAL,
NARROW,
NOBREAK,
SMALL,
SQUARE,
SUB,
SUPER,
VERTICAL,
WIDE
}
enum {
MIN_RADIX = 2,
MAX_RADIX = 36,
TYPE_SHIFT = 0,
TYPE_MASK = (1 << 5) - 1,
DIRECTION_SHIFT = TYPE_SHIFT + 5,
DIRECTION_MASK = (1 << 5) - 1,
MIRRORED_SHIFT = DIRECTION_SHIFT + 5,
MIRRORED_MASK = (1 << 1) - 1,
TOUPPER_SHIFT = MIRRORED_SHIFT + 1,
TOUPPER_MASK = (1 << 6)-1,
TOLOWER_SHIFT = TOUPPER_SHIFT + 6,
TOLOWER_MASK = (1 << 6)-1,
TOTITLE_SHIFT = TOLOWER_SHIFT+6,
TOTITLE_MASK = (1 << 2) - 1,
MIRROR_SHIFT = TOTITLE_SHIFT + 2,
MIRROR_MASK = (1 << 5) - 1,
NUMERIC_SHIFT = TOTITLE_SHIFT + 2,
NUMERIC_MASK = (1 << 7) - 1,
DECOMPOSITION_SHIFT = 11,
DECOMPOSITION_MASK = (1 << 5) - 1
}
/**
* Returns the packed data for java calls
* @param c The unicode character.
* @return The packed data for the character.
*
* Copied from java.lang.Character implementation:
* 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
* F E D C B A 9 8 7 6 5 4 3 2 1 0 F E D C B A 9 8 7 6 5 4 3 2 1 0
*
* 31 types ---------
* 18 directionalities ---------
* 2 mirroreds -
* ----------- 56 toupper diffs
* ----------- 48 tolower diffs
* --- 4 totitlecase diffs
* ------------- 84 numeric values
* --------- 24 mirror char diffs
*/
/*pure*/ dchar getPackedData(dchar c) {
// findCharacterValue returns a 16-bit value with the top 5 bits containing
// a decomposition type and the remaining bits containing an index.
return PACKED_DATA[findCharacterValue(c) & 0x7FF];
}
/*
* Get the Character type.
* @param c The unicode character.
* @return The character's type or CHARTYPE_UNASSIGNED if the character is
* invalid or has an unassigned class.
*/
/*pure*/ CharType getType(dchar c) {
if(c >= 0x10FFFF) return CharType.UNASSIGNED;
return cast(CharType)((getPackedData(c) >> TYPE_SHIFT) & TYPE_MASK);
}
/**
* Get the Character's decomposition type.
* @param c The unicode character.
* @return The character's decomposition type or DECOMPOSITION_NONE is there
* is no decomposition.
*/
/*pure*/ DecompositionType getDecompositionType(dchar c) {
// findCharacterValue returns a 16-bit value with the top 5 bits containing
// a decomposition type and the remaining bits containing an index.
return cast(DecompositionType)
((findCharacterValue(c) >> DECOMPOSITION_SHIFT) & DECOMPOSITION_MASK);
}
/*pure*/ int getDigitValue(dchar c, int radix) {
if(radix < MIN_RADIX || radix > MAX_RADIX) return -1;
int tempValue = radix;
if(c >= '0' && c <= '9')
tempValue = c - '0';
else if(c >= 'a' && c <= 'z')
tempValue = c - 'a' + 10;
else if(c >= 'A' && c <= 'Z')
tempValue = c - 'A' + 10;
return tempValue < radix ? tempValue : -1;
}
/*pure*/ int getNumericValue(dchar c) {
if(isUniMirroring(c)) return -1;
return NUMERICS[((getPackedData(c) >> NUMERIC_SHIFT) & NUMERIC_MASK)];
}
/*pure*/ Direction getDirectionality(dchar c) {
uint32_t data = getPackedData(c);
if(!data) return Direction.UNDEFINED;
Direction d = cast(Direction)((data >> DIRECTION_SHIFT) & DIRECTION_MASK);
return d != DIRECTION_MASK ? d : Direction.UNDEFINED;
}
/*pure*/ ushort findCharacterValue(dchar c) {
if(c > 0x10FFFF) throw new UCharException("invalid Unicode codepoint", c);
if(c <= 0xFF) return LATIN1_DATA[c];
// Rotate the bits because the tables are separated into even and odd
// codepoints
dchar u = (c >> 1) | ((c & 1) << 20);
const Range search = FULL_DATA[u >> 16];
const uint[] array = search.array;
// This trick is so that that compare in the while loop does not need to
// shift the array entry down by 16
u <<= 16;
u |= 0xFFFF;
int high = search.length - 1;
int low = 0;
if(high < 0) return 0;
while(low < high - 1) {
int probe = (high + low) >> 1;
// The entries contain the codepoint in the high 16 bits and the index
// into PACKED_DATA in the low 16.
if(array[probe] > u)
high = probe;
else
low = probe;
}
if(array[low] > u) throw new UCharException("a suitable range was not found", c);
return cast(ushort)(array[low] & 0xFFFF);
}
// Structure containing an array of ranges
struct Range {
int length;
uint[] array;
};
// For Latin1 characters just index into this array to get the index and decomposition
immutable ushort[] LATIN1_DATA = [
0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001,
0x0001, 0x0002, 0x0003, 0x0002, 0x0004, 0x0003, 0x0001, 0x0001,
0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001,
0x0001, 0x0001, 0x0001, 0x0001, 0x0003, 0x0003, 0x0003, 0x0002,
0x0005, 0x0006, 0x0006, 0x0007, 0x0008, 0x0007, 0x0006, 0x0006,
0x0009, 0x000A, 0x0006, 0x000B, 0x000C, 0x000D, 0x000C, 0x000C,
0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015,
0x0016, 0x0017, 0x000C, 0x0006, 0x0018, 0x0019, 0x001A, 0x0006,
0x0006, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x0020, 0x0021,
0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029,
0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x0030, 0x0031,
0x0032, 0x0033, 0x0034, 0x0035, 0x0006, 0x0036, 0x0037, 0x0038,
0x0037, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
0x0050, 0x0051, 0x0052, 0x0035, 0x0019, 0x0036, 0x0019, 0x0001,
0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0003, 0x0001, 0x0001,
0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001,
0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001,
0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001,
0x5853, 0x0006, 0x0008, 0x0008, 0x0008, 0x0008, 0x0054, 0x0054,
0x1037, 0x0054, 0x7855, 0x0056, 0x0019, 0x0057, 0x0054, 0x1037,
0x0058, 0x0059, 0x785A, 0x785B, 0x1037, 0x105C, 0x0054, 0x0006,
0x1037, 0x785D, 0x7855, 0x005E, 0x305F, 0x305F, 0x305F, 0x0006,
0x0860, 0x0860, 0x0860, 0x0860, 0x0860, 0x0860, 0x0060, 0x0860,
0x0860, 0x0860, 0x0860, 0x0860, 0x0860, 0x0860, 0x0860, 0x0860,
0x0060, 0x0860, 0x0860, 0x0860, 0x0860, 0x0860, 0x0860, 0x0019,
0x0060, 0x0860, 0x0860, 0x0860, 0x0860, 0x0860, 0x0060, 0x0055,
0x0861, 0x0861, 0x0861, 0x0861, 0x0861, 0x0861, 0x0061, 0x0861,
0x0861, 0x0861, 0x0861, 0x0861, 0x0861, 0x0861, 0x0861, 0x0861,
0x0061, 0x0861, 0x0861, 0x0861, 0x0861, 0x0861, 0x0861, 0x0019,
0x0061, 0x0861, 0x0861, 0x0861, 0x0861, 0x0861, 0x0061, 0x0862
];
// Each of these arrays is stripped into ranges. In order to build the arrays, each
// codepoint was bit-shifted so that even and odd characters were separated into different
// arrays. The identifier of each array is the top byte after bit-shifting.
// The numbers stored in the array are the bit-shifted codepoint, the decomposition, and an
// index into another array of all possible packed data values. The top 16 bits are the
// codepoint and the bottom 16 are the decomposition and index. The top 5 bits for the decomposition
// and the rest for the index.
// The full set of all arrays to be searched.
immutable Range[] FULL_DATA = [
Range(a0.length / uint.sizeof, a0),
Range(a1.sizeof / uint.sizeof, a1),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null),
Range(a7.sizeof / uint.sizeof, a7),
Range(a8.sizeof / uint.sizeof, a8),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null),
Range(a16.sizeof / uint.sizeof, a16),
Range(a17.sizeof / uint.sizeof, a17),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null),
Range(a23.sizeof / uint.sizeof, a23),
Range(a24.sizeof / uint.sizeof, a24),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null),
Range(0, null)
];
// Array of uppercase differences
immutable short[] UCDIFF = [
0, -32, 743, 121, -1, -232, -300, 97,
163, 130, 56, -2, -79, -210, -206, -205,
-202, -203, -207, -209, -211, -213, -214, -218,
-217, -219, -83, 84, -38, -37, -31, -64,
-63, -62, -57, -47, -54, -86, -80, 7,
-96, -48, -59, 8, 74, 86, 100, 128,
112, 126, 9, -7205, -16, -26, -7264, -40
];
// Array of lowercase differences
immutable short[] LCDIFF = [
0, 32, 1, -199, -121, 210, 206, 205,
79, 202, 203, 207, 211, 209, 213, 214,
218, 217, 219, 2, -97, -56, -130, -163,
83, 38, 37, 64, 63, -60, -7, 80,
48, 7264, -8, -74, -9, -86, -100, -112,
-128, -126, -7517, -8383, -8262, 16, 26, 40
];
// Array of titlecase differences
immutable short[] TCDIFF = [
3, 1, 0, -1
];
// Array of mirrored character differences
immutable short[] MIRROR_DIFF = [
0, 1, -1, 2, -2, 16, -16, 3,
-3, 2016, 138, 1824, 2104, 2108, 2106, -138,
8, 7, -8, -7, -1824, -2016, -2104, -2106,
-2108
];
// Array of all possible numeric values
immutable int[] NUMERICS = [
-1, 0, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30,
31, 32, 33, 34, 35, -2, 100, 1000,
40, 50, 60, 70, 80, 90, 10000, 500,
5000, 36, 37, 38, 39, 41, 42, 43,
44, 45, 46, 47, 48, 49, 200, 300,
400, 600, 700, 800, 900, 2000, 3000, 4000,
6000, 7000, 8000, 9000, 20000, 30000, 40000, 50000,
60000, 70000, 80000, 90000
];
// All possible packed data values, no duplicates
immutable uint[] PACKED_DATA = [
0x00000000, 0x0000012F, 0x0000016F, 0x0000014F, 0x0000018F, 0x0000018C, 0x000001B8, 0x000000B8,
0x000000BA, 0x020005B5, 0x040005B6, 0x00000099, 0x000000F8, 0x00000094, 0x02000069, 0x04000069,
0x06000069, 0x08000069, 0x0A000069, 0x0C000069, 0x0E000069, 0x10000069, 0x12000069, 0x14000069,
0x060005B9, 0x000001B9, 0x080005B9, 0x16020001, 0x18020001, 0x1A020001, 0x1C020001, 0x1E020001,
0x20020001, 0x22020001, 0x24020001, 0x26020001, 0x28020001, 0x2A020001, 0x2C020001, 0x2E020001,
0x30020001, 0x32020001, 0x34020001, 0x36020001, 0x38020001, 0x3A020001, 0x3C020001, 0x3E020001,
0x40020001, 0x42020001, 0x44020001, 0x46020001, 0x48020001, 0x060005B5, 0x080005B6, 0x000001BB,
0x000001B7, 0x16000802, 0x18000802, 0x1A000802, 0x1C000802, 0x1E000802, 0x20000802, 0x22000802,
0x24000802, 0x26000802, 0x28000802, 0x2A000802, 0x2C000802, 0x2E000802, 0x30000802, 0x32000802,
0x34000802, 0x36000802, 0x38000802, 0x3A000802, 0x3C000802, 0x3E000802, 0x40000802, 0x42000802,
0x44000802, 0x46000802, 0x48000802, 0x000000EC, 0x000001BC, 0x00000002, 0x0A0005BD, 0x00000130,
0x000000BC, 0x000000B9, 0x0600006B, 0x0800006B, 0x00001002, 0x0400006B, 0x0C0005BE, 0x4A0001AB,
0x00020001, 0x00000802, 0x00001802, 0x00040001, 0x00060001, 0x00002002, 0x00080001, 0x000C0001,
0x000E0001, 0x00100001, 0x00140001, 0x00160001, 0x00180001, 0x00004002, 0x00004802, 0x00200001,
0x00220001, 0x00000005, 0x00A60001, 0x01805802, 0x01042003, 0x00280001, 0x002C0001, 0x00000001,
0x00000000, 0x00007002, 0x00007802, 0x00009802, 0x0000A802, 0x0000B802, 0x0000C002, 0x0000C802,
0x0000D002, 0x00000004, 0x000001A4, 0x00000106, 0x00320001, 0x00340001, 0x00360001, 0x00380001,
0x0000E002, 0x0000E802, 0x0000F002, 0x0000F802, 0x00010002, 0x00010802, 0x00012002, 0x00012802,
0x00013802, 0x003A0001, 0x003E0001, 0x00013002, 0x0000001C, 0x00000107, 0x00400001, 0x00000018,
0x00014802, 0x000001B4, 0x00000038, 0x00000025, 0x00000050, 0x00000058, 0x00000045, 0x00000044,
0x020000C9, 0x060000C9, 0x0A0000C9, 0x0E0000C9, 0x120000C9, 0x000000D8, 0x0000005C, 0x00000008,
0x02000009, 0x06000009, 0x0A000009, 0x0E000009, 0x12000009, 0x0400000B, 0x0800000B, 0x0000000B,
0x1600000B, 0x4E00000B, 0x00000006, 0x4A00000B, 0x000001B5, 0x00420001, 0x0600000B, 0x0A00000B,
0x0E00000B, 0x1200000B, 0x3E00000B, 0x5200000B, 0x5600000B, 0x5A00000B, 0x5C00000B, 0x000001B6,
0x2400000A, 0x2800000A, 0x00000010, 0x020001AB, 0x060001AB, 0x0A0001AB, 0x0E0001AB, 0x120001AB,
0x00000108, 0x00015802, 0x00440001, 0x00016002, 0x00016802, 0x00017002, 0x00017802, 0x00018002,
0x00018802, 0x00440003, 0x00460001, 0x00480003, 0x00019802, 0x004A0001, 0x004C0001, 0x004E0001,
0x003C0001, 0x00500001, 0x00520001, 0x000001BD, 0x0000018D, 0x000001D0, 0x00000250, 0x00000230,
0x040005BE, 0x000000F9, 0x0200006B, 0x0A00006B, 0x0E00006B, 0x1200006B, 0x00540001, 0x00560001,
0x000005B9, 0x045A000A, 0x085A000A, 0x0C5A000A, 0x105A000A, 0x145A000A, 0x185A000A, 0x525A000A,
0x5E5A000A, 0x0401A00A, 0x0801A00A, 0x0C01A00A, 0x1001A00A, 0x1401A00A, 0x1801A00A, 0x5201A00A,
0x5E01A00A, 0x4E00000A, 0x5C00000A, 0x0E0005B9, 0x100005B9, 0x020005B9, 0x040005B9, 0x160005B9,
0x180005B9, 0x1A0005B9, 0x200005B9, 0x220005B9, 0x240005B9, 0x260005B9, 0x040001AB, 0x080001AB,
0x0C0001AB, 0x100001AB, 0x140001AB, 0x180001AB, 0x1C0001AB, 0x200001AB, 0x240001AB, 0x280001AB,
0x0C00006B, 0x1000006B, 0x1400006B, 0x1800006B, 0x1C00006B, 0x2000006B, 0x2400006B, 0x2800006B,
0x005C001C, 0x0001A81C, 0x1A0001AB, 0x1E0001AB, 0x220001AB, 0x260001AB, 0x2A0001AB, 0x160001AB,
0x020005B6, 0x100005B6, 0x280005B9, 0x2C0005B9, 0x300005B9, 0x0001B002, 0x020005BD, 0x0600000A,
0x0A00000A, 0x0E00000A, 0x1200000A, 0x1600000A, 0x3E00000A, 0x0C00000B, 0x1000000B, 0x1400000B,
0x2E0001AB, 0x320001AB, 0x360001AB, 0x3A0001AB, 0x3E0001AB, 0x420001AB, 0x460001AB, 0x640001AB,
0x680001AB, 0x6A0001AB, 0x6E0001AB, 0x720001AB, 0x760001AB, 0x7A0001AB, 0x00000013, 0x00000012,
0x0000005A, 0x000001B0, 0x7C00000B, 0x8000000B, 0x8200000B, 0x8600000B, 0x8C00000B, 0x6000000B,
0x9200000B, 0x9600000B, 0x9800000B, 0x9C00000B, 0xA000000B, 0xA400000B, 0x4A0001AA, 0x040001AA,
0x520001AA, 0x600001AA, 0x0C0001AA, 0x5E0001AA, 0x160001AA, 0x4C0001AA, 0x4E0001AA, 0x9E0001AA,
0x060001AA, 0x8800000A, 0x2A0001AA, 0x005E0001, 0x0001B802, 0x0400002B, 0x0800002B, 0x1600002B,
0x4C00002B, 0x00002802, 0x00003002, 0x000A0001, 0x00120001, 0x00003802, 0x001A0001, 0x001C0001,
0x001E0001, 0x00240001, 0x00005002, 0x00006002, 0x002A0001, 0x002E0001, 0x00300001, 0x00006802,
0x00008002, 0x00008802, 0x00009002, 0x0000A002, 0x0000B002, 0x0000D906, 0x00011002, 0x00011802,
0x00014002, 0x040000C9, 0x080000C9, 0x0C0000C9, 0x100000C9, 0x140000C9, 0x04000009, 0x08000009,
0x0C000009, 0x10000009, 0x14000009, 0x2200000B, 0x4C00000B, 0x2A00000B, 0x5000000B, 0x5400000B,
0x5800000B, 0x2600000A, 0x00015002, 0x00019002, 0x00000030, 0x000001BE, 0x0000014E, 0x00000210,
0x000001F0, 0x00580001, 0x065A000A, 0x0A5A000A, 0x0E5A000A, 0x125A000A, 0x165A000A, 0x1A5A000A,
0x4C5A000A, 0x4E5A000A, 0x0601A00A, 0x0A01A00A, 0x0E01A00A, 0x1201A00A, 0x1601A00A, 0x1A01A00A,
0x4C01A00A, 0x4E01A00A, 0x6000000A, 0x0000000A, 0x120005B9, 0x140005B9, 0x1C0005B9, 0x1E0005B9,
0x1600006B, 0x1A00006B, 0x1E00006B, 0x2200006B, 0x2600006B, 0x2A00006B, 0x0E0005B5, 0x040005B5,
0x2A0005B9, 0x2E0005B9, 0x0200000A, 0x0400000A, 0x0800000A, 0x0C00000A, 0x1000000A, 0x1400000A,
0x2A00000A, 0x2C0001AB, 0x300001AB, 0x340001AB, 0x380001AB, 0x3C0001AB, 0x400001AB, 0x440001AB,
0x480001AB, 0x620001AB, 0x660001AB, 0x500001AB, 0x6C0001AB, 0x700001AB, 0x740001AB, 0x780001AB,
0x520001AB, 0x7E00000B, 0x5E00000B, 0x8400000B, 0x8800000B, 0x8A00000B, 0x8E00000B, 0x9000000B,
0x9400000B, 0x9A00000B, 0x9E00000B, 0xA200000B, 0xA600000B, 0x5C0001AA, 0x3E0001AA, 0x7E0001AA,
0x0600002B, 0x0A00002B, 0x2A00002B, 0x4E00002B, 0x00000019
];