daily pastebin goal
17%
SHARE
TWEET

Regex .matchFirst benchmark

chadjoan Sep 9th, 2017 70 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //                           !!! LICENSE !!!
  3. // This example may be subject to the terms of the GPLv2, given that the
  4. // regular expression used was taken from the US Address Parser project:
  5. // https://usaddress.codeplex.com/
  6. //
  7. ///////////////////////////////////////////////////////////////////////////////
  8. //
  9. // Originally I tried to reproduce the problem by concocting a shoddy
  10. // datetime parser, in the hopes that it would be complex enough to demonstrate
  11. // the problem.  It wasn't.  So the address parser that originally found this
  12. // slowdown was used instead.  There are still peices of code that reference
  13. // the datetime parsing.  It doesn't seem to matter, because the slowdown still
  14. // happens.  Perhaps this slowdown is unaffected by whether the regex match
  15. // is successful or not.
  16. //
  17.  
  18. import std.algorithm.searching : count;
  19. import std.range.primitives;
  20. import std.regex;
  21. import std.stdio;
  22.  
  23. enum testSetSize = 100000;
  24. enum workBufferSize = 10000;
  25.  
  26. package Regex!char threadLocalRegex;
  27. package Regex!char threadLocalRegexNoCaptures;
  28.  
  29. //static StaticRegex!char ctComplexRegex     = ctRegex!(buildComplexRegexStr(), "sxi");
  30. //static StaticRegex!char ctCapturelessRegex = ctRegex!(buildCapturelessRegexStr(), "sxi");
  31.  
  32. static this()
  33. {
  34.     threadLocalRegex = buildComplexRegex();
  35.     threadLocalRegexNoCaptures = buildCapturelessRegex();
  36. }
  37.  
  38. auto noRegex(string toParse)
  39. {
  40.     // Very sloppy rough count of elements in a date string.
  41.     // It does not matter if it works or not.
  42.     // What does matter is that it does not allocate or use regular expressions,
  43.     // yet still does some kind of text analysis that regular expressions
  44.     // can be compared against.
  45.     import std.algorithm.iteration : filter, splitter;
  46.  
  47.     auto parts = toParse.splitter(' ').filter!`!a.empty`;
  48.  
  49.     auto parseDate(string toParse)
  50.     {
  51.         auto bits1 = toParse.splitter('-').filter!`!a.empty`;
  52.         if ( !bits1.empty )
  53.             return bits1.count;
  54.         auto bits2 = toParse.splitter('/').filter!`!a.empty`;
  55.         return bits2.count;
  56.     }
  57.  
  58.     auto parseTime(string toParse)
  59.     {
  60.         auto bits = toParse.splitter(':').filter!`!a.empty`;
  61.         return bits.count;
  62.     }
  63.  
  64.     if ( parts.empty )
  65.         return 0;
  66.  
  67.     auto nBits = parseDate(parts.front);
  68.     if ( nBits )
  69.     {
  70.         parts.popFront();
  71.         if ( parts.empty )
  72.             return nBits;
  73.         nBits += parseTime(parts.front);
  74.         return nBits;
  75.     }
  76.     else
  77.     {
  78.         nBits = parseTime(parts.front);
  79.         parts.popFront();
  80.         if ( parts.empty )
  81.             return nBits;
  82.         nBits += parseDate(parts.front);
  83.         return nBits;
  84.     }
  85. }
  86.  
  87. auto parseNoCaptures(string toParse)
  88. {
  89.     // We use the regex that does not use captures, though matchFirst will
  90.     // return at least one capture if the match succeeds: the capture of
  91.     // the entire match.
  92.     auto caps = toParse.matchFirst(threadLocalRegexNoCaptures);
  93.     auto cnt = caps.count;
  94.     assert(cnt <= 1);
  95.     return cnt;
  96. }
  97.  
  98. auto parseComplex(string toParse)
  99. {
  100.     auto caps = toParse.matchFirst(threadLocalRegex);
  101.     return caps.count;
  102. }
  103. /+
  104. auto parseNoCapturesCT(string toParse)
  105. {
  106.     // We use the regex that does not use captures, though matchFirst will
  107.     // return at least one capture if the match succeeds: the capture of
  108.     // the entire match.
  109.     auto caps = toParse.matchFirst(ctCapturelessRegex);
  110.     auto cnt = caps.count;
  111.     assert(cnt <= 1);
  112.     return cnt;
  113. }
  114.  
  115. auto parseComplexCT(string toParse)
  116. {
  117.     auto caps = toParse.matchFirst(ctComplexRegex);
  118.     return caps.count;
  119. }
  120. +/
  121.  
  122. struct Stats
  123. {
  124.     size_t total = 0;
  125.     size_t nCalls = 0;
  126.     size_t max = 0;
  127.  
  128.     @property size_t avg() { return total / nCalls; }
  129.  
  130.     void accumulate(size_t nCapturesOrElemss)
  131.     {
  132.         total += nCapturesOrElemss;
  133.         nCalls++;
  134.         if ( nCapturesOrElemss > max )
  135.             max = nCapturesOrElemss;
  136.     }
  137. }
  138.  
  139. auto benchAndPrint(alias bench)(string name, size_t n)
  140. {
  141.     import core.time : TickDuration;
  142.     import std.datetime : StopWatch;
  143.  
  144.     StopWatch sw;
  145.     TickDuration dt;
  146.  
  147.     stdout.write("Benchmarking ", name, " ");
  148.     sw.start();
  149.     auto stats = bench();
  150.     sw.stop();
  151.     dt = sw.peek();
  152.     stdout.writefln(" processed %s elements in %d.%0.3d seconds.", //" max == %d",
  153.         n, dt.seconds(), dt.msecs() % 1000, stats.max);
  154.     return stats;
  155. }
  156.  
  157. auto pbench(alias fn)(string name, string[] testData)
  158. {
  159.     import std.parallelism;
  160.     auto workUnitSize = workBufferSize/(taskPool.size*2);
  161.     auto bench() {
  162.         Stats stats;
  163.         foreach( result; taskPool.map!fn(testData, workBufferSize, workUnitSize) )
  164.             stats.accumulate(result);
  165.         return stats;
  166.     }
  167.     return benchAndPrint!bench(name, testData.length);
  168. }
  169.  
  170. auto sbench(alias fn)(string name, string[] testData)
  171. {
  172.     import std.algorithm.iteration : map;
  173.     auto bench() {
  174.         Stats stats;
  175.         foreach( result; testData.map!fn )
  176.             stats.accumulate(result);
  177.         return stats;
  178.     }
  179.     return benchAndPrint!bench(name, testData.length);
  180. }
  181.  
  182. void main()
  183. {
  184.     import std.array : array;
  185.     import std.range;
  186.  
  187.     string[] testData = [
  188.         "2017-08-09",
  189.         "1983-04-16",
  190.         "1234-32-32",
  191.         "2017-08-09 13:12:11",
  192.         "14:09:01.323",
  193.         "11/29/2001",
  194.         "13/32/9999",
  195.         "01/01/1001"
  196.     ];
  197.  
  198.     testData = testData.cycle.take(testSetSize).array;
  199.  
  200.     pbench!noRegex          ("non-regex parsing, parallel:               ", testData);
  201.     pbench!parseNoCaptures  ("regex parsing, no captures, parallel:      ", testData);
  202.     pbench!parseComplex     ("regex parsing, many captures, parallel:    ", testData);
  203.     //pbench!parseNoCapturesCT("ct regex parsing, no captures, parallel:   ", testData);
  204.     //pbench!parseComplexCT   ("ct regex parsing, many captures, parallel: ", testData);
  205.     sbench!noRegex          ("non-regex parsing, serial:                 ", testData);
  206.     sbench!parseNoCaptures  ("regex parsing, no captures, serial:        ", testData);
  207.     sbench!parseComplex     ("regex parsing, many captures, serial:      ", testData);
  208.     //sbench!parseNoCapturesCT("ct regex parsing, no captures, serial:     ", testData);
  209.     //sbench!parseComplexCT   ("ct regex parsing, many captures, serial:   ", testData);
  210. }
  211.  
  212.  
  213. /// Builds a gigantic regular expression that can be used to parse addresses.
  214. /// The result is intended to be fed into parseAddress(...).
  215. public auto buildAddressRegex()
  216. {
  217.     import std.algorithm.iteration : map, uniq;
  218.     import std.array : join;
  219.     import std.format;
  220.     import std.range : chain;
  221.     import std.regex;
  222.     import std.utf;
  223.  
  224.     enum string suffixPattern =
  225.         chain(suffixesArray.leftColumn, suffixesArray.rightColumn.uniq).join("|");
  226.  
  227.     enum string statePattern =
  228.         `\b(?:` ~
  229.         chain(
  230.             statesArray.leftColumn.map!(x => std.regex.escaper(x).toUTF8),
  231.             statesArray.rightColumn
  232.             ).join("|") ~
  233.         `)\b`;
  234.  
  235.     // This one can't be executed at compile-time due to calling the replaceAll
  236.     // method, which seems to call malloc and thus prevent itself from being
  237.     // CTFE-able.
  238.     string directionalPattern =
  239.         chain(
  240.             directionalsArray.leftColumn,
  241.             directionalsArray.rightColumn,
  242.             directionalsArray.rightColumn.map!(x => x.length == 1 ? x ~ `\.` : x)//std.regex.replaceAll(x, ctRegex!`(\w)`, `$1\.`))
  243.             ).join("|");
  244.  
  245.     enum zipPattern = `\d{5}(?:-?\d{4})?`;
  246.  
  247.     enum numberPattern =
  248.         `(?P<_numberPattern>
  249.            ((?P<number_00>\d+)(?P<secondaryNumber_00>(-[0-9])|(\-?[A-Z]))(?=\b))   (?# Unit-attached      )
  250.            |(?P<number_01>\d+[\-\x20]?\d+\/\d+)                                    (?# Fractional         )
  251.            |(?P<number_02>\d+\-?\d*)                                               (?# Normal Number      )
  252.            |(?P<number_03>[NSWE]\x20?\d+\x20?[NSWE]\x20?\d+)                       (?# Wisconsin/Illinois )
  253.        )`;
  254.  
  255.     // This can't be executed at compile-time due to using directionalPattern.
  256.     auto streetPattern =
  257.         format(
  258.             `
  259.                (?P<_streetPattern>
  260.                    (?P<_streetPattern_0>
  261.                        (?# special case for addresses like 100 South Street)
  262.                        (?P<street_00>%1$s)\W+
  263.                        (?P<suffix_00>%2$s)\b
  264.                    )
  265.                    |
  266.                    (?P<_streetPattern_1>
  267.                        (?:(?P<predirectional_00>%1$s)\W+)?
  268.                        (?P<_streetPattern_1_0>
  269.                            (?P<_streetPattern_1_0_0>
  270.                                (?P<street_01>[^,]*\d)
  271.                                (?:[^\w,]*(?P<postdirectional_01>%1$s)\b)
  272.                            )
  273.                            |
  274.                            (?P<_streetPattern_1_0_1>
  275.                                (?P<street_02>[^,]+)
  276.                                (?:[^\w,]+(?P<suffix_02>%2$s)\b)
  277.                                (?:[^\w,]+(?P<postdirectional_02>%1$s)\b)?
  278.                            )
  279.                            |
  280.                            (?P<_streetPattern_1_0_2>
  281.                                (?P<street_03>[^,]+?)
  282.                                (?:[^\w,]+(?P<suffix_03>%2$s)\b)?
  283.                                (?:[^\w,]+(?P<postdirectional_03>%1$s)\b)?
  284.                            )
  285.                        )
  286.                    )
  287.                )
  288.            `,
  289.             directionalPattern,
  290.             suffixPattern);
  291.  
  292.     enum rangedSecondaryUnitPattern =
  293.         `(?P<secondaryUnit_00>` ~
  294.         rangedSecondaryUnitsArray.leftColumn.join("|") ~
  295.         `)(?![a-z])`;
  296.  
  297.     enum rangelessSecondaryUnitPattern =
  298.         `(?P<secondaryUnit_01>` ~
  299.         rangelessSecondaryUnitsArray.leftColumn.join("|") ~
  300.         `)\b`;
  301.  
  302.     enum allSecondaryUnitPattern = format(
  303.         `
  304.            (?P<_allSecondaryUnitPattern>
  305.                (?:[:]?
  306.                    (?: (?:%1$s \W*)
  307.                        | (?P<secondaryUnit_02>\#)\W*
  308.                    )
  309.                    (?P<secondaryNumber_02>[\w-]+)
  310.                )
  311.                |%2$s
  312.            ),?
  313.        `,
  314.         rangedSecondaryUnitPattern,
  315.         rangelessSecondaryUnitPattern);
  316.  
  317.     enum cityAndStatePattern = format(
  318.         `
  319.            (?P<_cityAndStatePattern_%%1$s>
  320.                (?P<city_%%1$s>[^\d,]+?)\W+
  321.                (?P<state_%%1$s>%1$s)
  322.            )
  323.        `,
  324.         statePattern);
  325.  
  326.     enum placePattern = format(
  327.         `
  328.            (?:%1$s\W*)?
  329.            (?:(?P<zip_%%1$s>%2$s))?
  330.        `,
  331.         format(cityAndStatePattern,"%1$s"),
  332.         zipPattern);
  333.  
  334.     // This can't be executed at compile-time due to using streetPattern.
  335.     auto addressPattern = format(
  336.         `
  337.            ^
  338.            (?P<_addressPattern>
  339.                (?P<_addressPattern_0>
  340.                    (?# Special case for APO/FPO/DPO addresses)
  341.                    [^\w\#]*
  342.                    (?P<streetLine_00>.+?)
  343.                    (?P<city_00>[AFD]PO)\W+
  344.                    (?P<state_00>A[AEP])\W+
  345.                    (?P<zip_00>%6$s)
  346.                    \W*
  347.                )
  348.                |
  349.                (?P<_addressPattern_1>
  350.                    (?# Special case for PO boxes)
  351.                    \W*
  352.                    (?P<streetLine_01>(P[\.\x20]?O[\.\x20]?\x20)?BOX\x20[0-9]+)\W+
  353.                    %5$s
  354.                    \W*
  355.                )
  356.                |
  357.                (?P<_addressPattern_2>
  358.                    [^\w\#]*    (?# skip non-word chars except # {eg unit})
  359.                    (?:%1$s)\W*
  360.                       %2$s\W+
  361.                    (?:%3$s\W+)?
  362.                       %4$s
  363.                    \W*         (?# require on non-word chars at end)
  364.                )
  365.            )
  366.            $           (?# right up to end of string)
  367.        `,
  368.         numberPattern,
  369.         streetPattern,
  370.         allSecondaryUnitPattern,
  371.         format(placePattern,"02"),
  372.         format(placePattern,"01"),
  373.         zipPattern);
  374.  
  375.     return addressPattern;
  376. }
  377.  
  378. string buildComplexRegexStr()
  379. {
  380.     return buildAddressRegex();
  381. }
  382.  
  383. auto buildComplexRegex()
  384. {
  385.     return regex(buildComplexRegexStr(), "sxi");
  386. }
  387.  
  388. auto buildCapturelessRegexStr()
  389. {
  390.     //auto captureRemover = regex(`^(.*?)(P[<].*?[>])`, "si");
  391.     //auto regexStr = replaceAll(buildComplexRegexStr(), captureRemover, ":");
  392.     //return regexStr;
  393.     // Can't be used at compile-time because "malloc cannot be interpreted at compile time".
  394.  
  395.     import std.exception : assumeUnique;
  396.  
  397.     auto matchCapture(string toParse)
  398.     {
  399.         size_t i = 0;
  400.         if ( i+2 >= toParse.length || toParse[i..i+2] != "P<" )
  401.             return 0;
  402.  
  403.         i += 2;
  404.         if ( i+1 >= toParse.length )
  405.             return 0;
  406.  
  407.         while ( i < toParse.length && toParse[i] != '>' )
  408.             i++;
  409.  
  410.         if ( i >= toParse.length || toParse[i] != '>' )
  411.             return 0;
  412.  
  413.         i++;
  414.         return i;
  415.     }
  416.  
  417.     auto complexStr = buildComplexRegexStr();
  418.     char[] capturelessStr = new char[complexStr.length];
  419.     size_t j = 0;
  420.     while ( complexStr.length > 0 )
  421.     {
  422.         size_t i = matchCapture(complexStr);
  423.         if (i)
  424.         {
  425.             capturelessStr[j++] = ':';
  426.             complexStr = complexStr[i..$];
  427.         }
  428.         else
  429.         {
  430.             capturelessStr[j++] = complexStr[0];
  431.             complexStr = complexStr[1..$];
  432.         }
  433.     }
  434.  
  435.     return capturelessStr[0..j].assumeUnique;
  436. }
  437.  
  438. auto buildCapturelessRegex()
  439. {
  440.     enum regexStr = buildCapturelessRegexStr();
  441.     //pragma(msg, "Regex without captures:\n"~regexStr~"\n");
  442.     return regex(regexStr, "sxi");
  443. }
  444.  
  445. // ----------------------------------------------------------------------------
  446. // Address parser details
  447.  
  448. private
  449. {
  450.     /// Maps directional names (north, northeast, etc.) to abbreviations (N, NE, etc.).
  451.     immutable string[string] directionalsAA; /// ditto
  452.  
  453.     /// Maps lowercased US state and territory names to their canonical two-letter
  454.     /// postal abbreviations.
  455.     immutable string[string] statesAA; /// ditto
  456.  
  457.     /// Maps lowerecased USPS standard street suffixes to their canonical postal
  458.     /// abbreviations as found in TIGER/Line.
  459.     immutable string[string] suffixesAA; /// ditto
  460.  
  461.     /// Secondary units that require a number after them.
  462.     immutable string[string] rangedSecondaryUnitsAA; /// ditto
  463.  
  464.     /// Secondary units that do not require a number after them.
  465.     immutable string[string] rangelessSecondaryUnitsAA; /// ditto
  466. }
  467.  
  468. static this()
  469. {
  470.     import std.range;
  471.     import std.stdio;
  472.  
  473.     directionalsAA             = tableToAA(directionalsArray);
  474.     statesAA                   = tableToAA(statesArray);
  475.     suffixesAA                 = tableToAA(suffixesArray);
  476.     rangedSecondaryUnitsAA     = tableToAA(rangedSecondaryUnitsArray);
  477.     rangelessSecondaryUnitsAA  = tableToAA(rangelessSecondaryUnitsArray);
  478. }
  479.  
  480. pure private string[string] tableToAA(const string[2][] arr)
  481. {
  482.     string[string] result;
  483.     foreach ( pair; arr )
  484.         result[pair[0]] = pair[1];
  485.     return result;
  486. }
  487.  
  488. pure private auto leftColumn(string[][] table)
  489. {
  490.     import std.algorithm.iteration : map;
  491.     return table.map!`a[0]`;
  492. }
  493.  
  494. pure private auto rightColumn(string[][] table)
  495. {
  496.     import std.algorithm.iteration : map;
  497.     return table.map!`a[$-1]`;
  498. }
  499.  
  500. private:
  501.  
  502. /// Maps directional names (north, northeast, etc.) to abbreviations (N, NE, etc.).
  503. enum string[2][] directionalsArray = [
  504.     ["NORTH", "N"],
  505.     ["NORTHEAST", "NE"],
  506.     ["EAST", "E"],
  507.     ["SOUTHEAST", "SE"],
  508.     ["SOUTH", "S"],
  509.     ["SOUTHWEST", "SW"],
  510.     ["WEST", "W"],
  511.     ["NORTHWEST", "NW"]];
  512.  
  513. /// Secondary units that do not require a number after them.
  514. enum string[2][] rangelessSecondaryUnitsArray = [
  515.     ["BA?SE?ME?N?T", "BSMT"],
  516.     ["FRO?NT", "FRNT"],
  517.     ["LO?BBY", "LBBY"],
  518.     ["LOWE?R", "LOWR"],
  519.     ["OFF?I?CE?", "OFC"],
  520.     ["PE?N?T?HO?U?S?E?", "PH"],
  521.     ["REAR", "REAR"],
  522.     ["SIDE", "SIDE"],
  523.     ["UPPE?R", "UPPR"]];
  524.  
  525. /// Secondary units that require a number after them.
  526. enum string[2][] rangedSecondaryUnitsArray = [
  527.     ["SU?I?TE", "STE"],
  528.     ["(?:AP)(?:AR)?T(?:ME?NT)?", "APT"],
  529.     ["(?:DEP)(?:AR)?T(?:ME?NT)?", "DEPT"],
  530.     ["RO*M", "RM"],
  531.     ["FLO*R?", "FL"],
  532.     ["UNI?T", "UNIT"],
  533.     ["BU?I?LDI?N?G", "BLDG"],
  534.     ["HA?NGA?R", "HNGR"],
  535.     ["KEY", "KEY"],
  536.     ["LO?T", "LOT"],
  537.     ["PIER", "PIER"],
  538.     ["SLIP", "SLIP"],
  539.     ["SPA?CE?", "SPACE"],
  540.     ["STOP", "STOP"],
  541.     ["TRA?I?LE?R", "TRLR"],
  542.     ["BOX", "BOX"]];
  543.  
  544.  
  545. /// Maps lowercased US state and territory names to their canonical two-letter
  546. /// postal abbreviations.
  547. enum string[2][] statesArray = [
  548.     ["ALABAMA", "AL"],
  549.     ["ALASKA", "AK"],
  550.     ["AMERICAN SAMOA", "AS"],
  551.     ["ARIZONA", "AZ"],
  552.     ["ARKANSAS", "AR"],
  553.     ["CALIFORNIA", "CA"],
  554.     ["COLORADO", "CO"],
  555.     ["CONNECTICUT", "CT"],
  556.     ["DELAWARE", "DE"],
  557.     ["DISTRICT OF COLUMBIA", "DC"],
  558.     ["FEDERATED STATES OF MICRONESIA", "FM"],
  559.     ["FLORIDA", "FL"],
  560.     ["GEORGIA", "GA"],
  561.     ["GUAM", "GU"],
  562.     ["HAWAII", "HI"],
  563.     ["IDAHO", "ID"],
  564.     ["ILLINOIS", "IL"],
  565.     ["INDIANA", "IN"],
  566.     ["IOWA", "IA"],
  567.     ["KANSAS", "KS"],
  568.     ["KENTUCKY", "KY"],
  569.     ["LOUISIANA", "LA"],
  570.     ["MAINE", "ME"],
  571.     ["MARSHALL ISLANDS", "MH"],
  572.     ["MARYLAND", "MD"],
  573.     ["MASSACHUSETTS", "MA"],
  574.     ["MICHIGAN", "MI"],
  575.     ["MINNESOTA", "MN"],
  576.     ["MISSISSIPPI", "MS"],
  577.     ["MISSOURI", "MO"],
  578.     ["MONTANA", "MT"],
  579.     ["NEBRASKA", "NE"],
  580.     ["NEVADA", "NV"],
  581.     ["NEW HAMPSHIRE", "NH"],
  582.     ["NEW JERSEY", "NJ"],
  583.     ["NEW MEXICO", "NM"],
  584.     ["NEW YORK", "NY"],
  585.     ["NORTH CAROLINA", "NC"],
  586.     ["NORTH DAKOTA", "ND"],
  587.     ["NORTHERN MARIANA ISLANDS", "MP"],
  588.     ["OHIO", "OH"],
  589.     ["OKLAHOMA", "OK"],
  590.     ["OREGON", "OR"],
  591.     ["PALAU", "PW"],
  592.     ["PENNSYLVANIA", "PA"],
  593.     ["PUERTO RICO", "PR"],
  594.     ["RHODE ISLAND", "RI"],
  595.     ["SOUTH CAROLINA", "SC"],
  596.     ["SOUTH DAKOTA", "SD"],
  597.     ["TENNESSEE", "TN"],
  598.     ["TEXAS", "TX"],
  599.     ["UTAH", "UT"],
  600.     ["VERMONT", "VT"],
  601.     ["VIRGIN ISLANDS", "VI"],
  602.     ["VIRGINIA", "VA"],
  603.     ["WASHINGTON", "WA"],
  604.     ["WEST VIRGINIA", "WV"],
  605.     ["WISCONSIN", "WI"],
  606.     ["WYOMING", "WY"]];
  607.  
  608.  
  609. /// Maps lowerecased USPS standard street suffixes to their canonical postal
  610. /// abbreviations as found in TIGER/Line.
  611. enum string[2][] suffixesArray = [
  612.     ["ALLEE", "ALY"],
  613.     ["ALLEY", "ALY"],
  614.     ["ALLY", "ALY"],
  615.     ["ANEX", "ANX"],
  616.     ["ANNEX", "ANX"],
  617.     ["ANNX", "ANX"],
  618.     ["ARCADE", "ARC"],
  619.     ["AV", "AVE"],
  620.     ["AVEN", "AVE"],
  621.     ["AVENU", "AVE"],
  622.     ["AVENUE", "AVE"],
  623.     ["AVN", "AVE"],
  624.     ["AVNUE", "AVE"],
  625.     ["BAYOO", "BYU"],
  626.     ["BAYOU", "BYU"],
  627.     ["BEACH", "BCH"],
  628.     ["BEND", "BND"],
  629.     ["BLUF", "BLF"],
  630.     ["BLUFF", "BLF"],
  631.     ["BLUFFS", "BLFS"],
  632.     ["BOT", "BTM"],
  633.     ["BOTTM", "BTM"],
  634.     ["BOTTOM", "BTM"],
  635.     ["BOUL", "BLVD"],
  636.     ["BOULEVARD", "BLVD"],
  637.     ["BOULV", "BLVD"],
  638.     ["BRANCH", "BR"],
  639.     ["BRDGE", "BRG"],
  640.     ["BRIDGE", "BRG"],
  641.     ["BRNCH", "BR"],
  642.     ["BROOK", "BRK"],
  643.     ["BROOKS", "BRKS"],
  644.     ["BURG", "BG"],
  645.     ["BURGS", "BGS"],
  646.     ["BYPA", "BYP"],
  647.     ["BYPAS", "BYP"],
  648.     ["BYPASS", "BYP"],
  649.     ["BYPS", "BYP"],
  650.     ["CAMP", "CP"],
  651.     ["CANYN", "CYN"],
  652.     ["CANYON", "CYN"],
  653.     ["CAPE", "CPE"],
  654.     ["CAUSEWAY", "CSWY"],
  655.     ["CAUSWAY", "CSWY"],
  656.     ["CEN", "CTR"],
  657.     ["CENT", "CTR"],
  658.     ["CENTER", "CTR"],
  659.     ["CENTERS", "CTRS"],
  660.     ["CENTR", "CTR"],
  661.     ["CENTRE", "CTR"],
  662.     ["CIRC", "CIR"],
  663.     ["CIRCL", "CIR"],
  664.     ["CIRCLE", "CIR"],
  665.     ["CIRCLES", "CIRS"],
  666.     ["CK", "CRK"],
  667.     ["CLIFF", "CLF"],
  668.     ["CLIFFS", "CLFS"],
  669.     ["CLUB", "CLB"],
  670.     ["CMP", "CP"],
  671.     ["CNTER", "CTR"],
  672.     ["CNTR", "CTR"],
  673.     ["CNYN", "CYN"],
  674.     ["COMMON", "CMN"],
  675.     ["CORNER", "COR"],
  676.     ["CORNERS", "CORS"],
  677.     ["COURSE", "CRSE"],
  678.     ["COURT", "CT"],
  679.     ["COURTS", "CTS"],
  680.     ["COVE", "CV"],
  681.     ["COVES", "CVS"],
  682.     ["CR", "CRK"],
  683.     ["CRCL", "CIR"],
  684.     ["CRCLE", "CIR"],
  685.     ["CRECENT", "CRES"],
  686.     ["CREEK", "CRK"],
  687.     ["CRESCENT", "CRES"],
  688.     ["CRESENT", "CRES"],
  689.     ["CREST", "CRST"],
  690.     ["CROSSING", "XING"],
  691.     ["CROSSROAD", "XRD"],
  692.     ["CRSCNT", "CRES"],
  693.     ["CRSENT", "CRES"],
  694.     ["CRSNT", "CRES"],
  695.     ["CRSSING", "XING"],
  696.     ["CRSSNG", "XING"],
  697.     ["CRT", "CT"],
  698.     ["CURVE", "CURV"],
  699.     ["DALE", "DL"],
  700.     ["DAM", "DM"],
  701.     ["DIV", "DV"],
  702.     ["DIVIDE", "DV"],
  703.     ["DRIV", "DR"],
  704.     ["DRIVE", "DR"],
  705.     ["DRIVES", "DRS"],
  706.     ["DRV", "DR"],
  707.     ["DVD", "DV"],
  708.     ["ESTATE", "EST"],
  709.     ["ESTATES", "ESTS"],
  710.     ["EXP", "EXPY"],
  711.     ["EXPR", "EXPY"],
  712.     ["EXPRESS", "EXPY"],
  713.     ["EXPRESSWAY", "EXPY"],
  714.     ["EXPW", "EXPY"],
  715.     ["EXTENSION", "EXT"],
  716.     ["EXTENSIONS", "EXTS"],
  717.     ["EXTN", "EXT"],
  718.     ["EXTNSN", "EXT"],
  719.     ["FALLS", "FLS"],
  720.     ["FERRY", "FRY"],
  721.     ["FIELD", "FLD"],
  722.     ["FIELDS", "FLDS"],
  723.     ["FLAT", "FLT"],
  724.     ["FLATS", "FLTS"],
  725.     ["FORD", "FRD"],
  726.     ["FORDS", "FRDS"],
  727.     ["FOREST", "FRST"],
  728.     ["FORESTS", "FRST"],
  729.     ["FORG", "FRG"],
  730.     ["FORGE", "FRG"],
  731.     ["FORGES", "FRGS"],
  732.     ["FORK", "FRK"],
  733.     ["FORKS", "FRKS"],
  734.     ["FORT", "FT"],
  735.     ["FREEWAY", "FWY"],
  736.     ["FREEWY", "FWY"],
  737.     ["FRRY", "FRY"],
  738.     ["FRT", "FT"],
  739.     ["FRWAY", "FWY"],
  740.     ["FRWY", "FWY"],
  741.     ["GARDEN", "GDN"],
  742.     ["GARDENS", "GDNS"],
  743.     ["GARDN", "GDN"],
  744.     ["GATEWAY", "GTWY"],
  745.     ["GATEWY", "GTWY"],
  746.     ["GATWAY", "GTWY"],
  747.     ["GLEN", "GLN"],
  748.     ["GLENS", "GLNS"],
  749.     ["GRDEN", "GDN"],
  750.     ["GRDN", "GDN"],
  751.     ["GRDNS", "GDNS"],
  752.     ["GREEN", "GRN"],
  753.     ["GREENS", "GRNS"],
  754.     ["GROV", "GRV"],
  755.     ["GROVE", "GRV"],
  756.     ["GROVES", "GRVS"],
  757.     ["GTWAY", "GTWY"],
  758.     ["HARB", "HBR"],
  759.     ["HARBOR", "HBR"],
  760.     ["HARBORS", "HBRS"],
  761.     ["HARBR", "HBR"],
  762.     ["HAVEN", "HVN"],
  763.     ["HAVN", "HVN"],
  764.     ["HEIGHT", "HTS"],
  765.     ["HEIGHTS", "HTS"],
  766.     ["HGTS", "HTS"],
  767.     ["HIGHWAY", "HWY"],
  768.     ["HIGHWY", "HWY"],
  769.     ["HILL", "HL"],
  770.     ["HILLS", "HLS"],
  771.     ["HIWAY", "HWY"],
  772.     ["HIWY", "HWY"],
  773.     ["HLLW", "HOLW"],
  774.     ["HOLLOW", "HOLW"],
  775.     ["HOLLOWS", "HOLW"],
  776.     ["HOLWS", "HOLW"],
  777.     ["HRBOR", "HBR"],
  778.     ["HT", "HTS"],
  779.     ["HWAY", "HWY"],
  780.     ["INLET", "INLT"],
  781.     ["ISLAND", "IS"],
  782.     ["ISLANDS", "ISS"],
  783.     ["ISLES", "ISLE"],
  784.     ["ISLND", "IS"],
  785.     ["ISLNDS", "ISS"],
  786.     ["JCTION", "JCT"],
  787.     ["JCTN", "JCT"],
  788.     ["JCTNS", "JCTS"],
  789.     ["JUNCTION", "JCT"],
  790.     ["JUNCTIONS", "JCTS"],
  791.     ["JUNCTN", "JCT"],
  792.     ["JUNCTON", "JCT"],
  793.     ["KEY", "KY"],
  794.     ["KEYS", "KYS"],
  795.     ["KNOL", "KNL"],
  796.     ["KNOLL", "KNL"],
  797.     ["KNOLLS", "KNLS"],
  798.     ["LA", "LN"],
  799.     ["LAKE", "LK"],
  800.     ["LAKES", "LKS"],
  801.     ["LANDING", "LNDG"],
  802.     ["LANE", "LN"],
  803.     ["LANES", "LN"],
  804.     ["LDGE", "LDG"],
  805.     ["LIGHT", "LGT"],
  806.     ["LIGHTS", "LGTS"],
  807.     ["LNDNG", "LNDG"],
  808.     ["LOAF", "LF"],
  809.     ["LOCK", "LCK"],
  810.     ["LOCKS", "LCKS"],
  811.     ["LODG", "LDG"],
  812.     ["LODGE", "LDG"],
  813.     ["LOOPS", "LOOP"],
  814.     ["MANOR", "MNR"],
  815.     ["MANORS", "MNRS"],
  816.     ["MEADOW", "MDW"],
  817.     ["MEADOWS", "MDWS"],
  818.     ["MEDOWS", "MDWS"],
  819.     ["MILL", "ML"],
  820.     ["MILLS", "MLS"],
  821.     ["MISSION", "MSN"],
  822.     ["MISSN", "MSN"],
  823.     ["MNT", "MT"],
  824.     ["MNTAIN", "MTN"],
  825.     ["MNTN", "MTN"],
  826.     ["MNTNS", "MTNS"],
  827.     ["MOTORWAY", "MTWY"],
  828.     ["MOUNT", "MT"],
  829.     ["MOUNTAIN", "MTN"],
  830.     ["MOUNTAINS", "MTNS"],
  831.     ["MOUNTIN", "MTN"],
  832.     ["MSSN", "MSN"],
  833.     ["MTIN", "MTN"],
  834.     ["NECK", "NCK"],
  835.     ["ORCHARD", "ORCH"],
  836.     ["ORCHRD", "ORCH"],
  837.     ["OVERPASS", "OPAS"],
  838.     ["OVL", "OVAL"],
  839.     ["PARKS", "PARK"],
  840.     ["PARKWAY", "PKWY"],
  841.     ["PARKWAYS", "PKWY"],
  842.     ["PARKWY", "PKWY"],
  843.     ["PASSAGE", "PSGE"],
  844.     ["PATHS", "PATH"],
  845.     ["PIKES", "PIKE"],
  846.     ["PINE", "PNE"],
  847.     ["PINES", "PNES"],
  848.     ["PK", "PARK"],
  849.     ["PKWAY", "PKWY"],
  850.     ["PKWYS", "PKWY"],
  851.     ["PKY", "PKWY"],
  852.     ["PLACE", "PL"],
  853.     ["PLAIN", "PLN"],
  854.     ["PLAINES", "PLNS"],
  855.     ["PLAINS", "PLNS"],
  856.     ["PLAZA", "PLZ"],
  857.     ["PLZA", "PLZ"],
  858.     ["POINT", "PT"],
  859.     ["POINTS", "PTS"],
  860.     ["PORT", "PRT"],
  861.     ["PORTS", "PRTS"],
  862.     ["PRAIRIE", "PR"],
  863.     ["PRARIE", "PR"],
  864.     ["PRK", "PARK"],
  865.     ["PRR", "PR"],
  866.     ["RAD", "RADL"],
  867.     ["RADIAL", "RADL"],
  868.     ["RADIEL", "RADL"],
  869.     ["RANCH", "RNCH"],
  870.     ["RANCHES", "RNCH"],
  871.     ["RAPID", "RPD"],
  872.     ["RAPIDS", "RPDS"],
  873.     ["RDGE", "RDG"],
  874.     ["REST", "RST"],
  875.     ["RIDGE", "RDG"],
  876.     ["RIDGES", "RDGS"],
  877.     ["RIVER", "RIV"],
  878.     ["RIVR", "RIV"],
  879.     ["RNCHS", "RNCH"],
  880.     ["ROAD", "RD"],
  881.     ["ROADS", "RDS"],
  882.     ["ROUTE", "RTE"],
  883.     ["RVR", "RIV"],
  884.     ["SHOAL", "SHL"],
  885.     ["SHOALS", "SHLS"],
  886.     ["SHOAR", "SHR"],
  887.     ["SHOARS", "SHRS"],
  888.     ["SHORE", "SHR"],
  889.     ["SHORES", "SHRS"],
  890.     ["SKYWAY", "SKWY"],
  891.     ["SPNG", "SPG"],
  892.     ["SPNGS", "SPGS"],
  893.     ["SPRING", "SPG"],
  894.     ["SPRINGS", "SPGS"],
  895.     ["SPRNG", "SPG"],
  896.     ["SPRNGS", "SPGS"],
  897.     ["SPURS", "SPUR"],
  898.     ["SQR", "SQ"],
  899.     ["SQRE", "SQ"],
  900.     ["SQRS", "SQS"],
  901.     ["SQU", "SQ"],
  902.     ["SQUARE", "SQ"],
  903.     ["SQUARES", "SQS"],
  904.     ["STATION", "STA"],
  905.     ["STATN", "STA"],
  906.     ["STN", "STA"],
  907.     ["STR", "ST"],
  908.     ["STRAV", "STRA"],
  909.     ["STRAVE", "STRA"],
  910.     ["STRAVEN", "STRA"],
  911.     ["STRAVENUE", "STRA"],
  912.     ["STRAVN", "STRA"],
  913.     ["STREAM", "STRM"],
  914.     ["STREET", "ST"],
  915.     ["STREETS", "STS"],
  916.     ["STREME", "STRM"],
  917.     ["STRT", "ST"],
  918.     ["STRVN", "STRA"],
  919.     ["STRVNUE", "STRA"],
  920.     ["SUMIT", "SMT"],
  921.     ["SUMITT", "SMT"],
  922.     ["SUMMIT", "SMT"],
  923.     ["TERR", "TER"],
  924.     ["TERRACE", "TER"],
  925.     ["THROUGHWAY", "TRWY"],
  926.     ["TPK", "TPKE"],
  927.     ["TR", "TRL"],
  928.     ["TRACE", "TRCE"],
  929.     ["TRACES", "TRCE"],
  930.     ["TRACK", "TRAK"],
  931.     ["TRACKS", "TRAK"],
  932.     ["TRAFFICWAY", "TRFY"],
  933.     ["TRAIL", "TRL"],
  934.     ["TRAILS", "TRL"],
  935.     ["TRK", "TRAK"],
  936.     ["TRKS", "TRAK"],
  937.     ["TRLS", "TRL"],
  938.     ["TRNPK", "TPKE"],
  939.     ["TRPK", "TPKE"],
  940.     ["TUNEL", "TUNL"],
  941.     ["TUNLS", "TUNL"],
  942.     ["TUNNEL", "TUNL"],
  943.     ["TUNNELS", "TUNL"],
  944.     ["TUNNL", "TUNL"],
  945.     ["TURNPIKE", "TPKE"],
  946.     ["TURNPK", "TPKE"],
  947.     ["UNDERPASS", "UPAS"],
  948.     ["UNION", "UN"],
  949.     ["UNIONS", "UNS"],
  950.     ["VALLEY", "VLY"],
  951.     ["VALLEYS", "VLYS"],
  952.     ["VALLY", "VLY"],
  953.     ["VDCT", "VIA"],
  954.     ["VIADCT", "VIA"],
  955.     ["VIADUCT", "VIA"],
  956.     ["VIEW", "VW"],
  957.     ["VIEWS", "VWS"],
  958.     ["VILL", "VLG"],
  959.     ["VILLAG", "VLG"],
  960.     ["VILLAGE", "VLG"],
  961.     ["VILLAGES", "VLGS"],
  962.     ["VILLE", "VL"],
  963.     ["VILLG", "VLG"],
  964.     ["VILLIAGE", "VLG"],
  965.     ["VIST", "VIS"],
  966.     ["VISTA", "VIS"],
  967.     ["VLLY", "VLY"],
  968.     ["VST", "VIS"],
  969.     ["VSTA", "VIS"],
  970.     ["WALKS", "WALK"],
  971.     ["WELL", "WL"],
  972.     ["WELLS", "WLS"],
  973.     ["WY", "WAY"]];
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top