Advertisement
emabrey

org.emabrey.stackoverflow.java.StringLengthExample class

Dec 23rd, 2016
472
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /*
  2.  * The MIT License (MIT)
  3.  *
  4.  * Copyright © 2016 Emily Mabrey
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included in
  14.  * all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  */
  24. package org.emabrey.stackoverflow.java;
  25.  
  26. import java.io.InputStream;
  27. import java.nio.charset.Charset;
  28. import java.nio.charset.StandardCharsets;
  29. import java.text.BreakIterator;
  30. import java.text.Normalizer;
  31. import java.util.ArrayList;
  32. import java.util.List;
  33. import java.util.Locale;
  34. import java.util.Scanner;
  35.  
  36. /**
  37.  * Example class for use in answering <a href="http://stackoverflow.com/a/41309320/2446574">this StackOverflow question
  38.  * on getting the length of a String</a>. This class requires a file named "unicodetest.txt" be placed alongside it in
  39.  * the same package as a JAR resource; that file should contain the contents of the Quick Brown Unicode file available
  40.  * <a href="http://www.cl.cam.ac.uk/~mgk25/ucs/examples/quickbrown.txt"> here</a>.
  41.  *
  42.  * @see http://stackoverflow.com/a/41309320/2446574
  43.  * @see http://www.cl.cam.ac.uk/~mgk25/ucs/examples/quickbrown.txt
  44.  *
  45.  * @author Emily Mabrey <emilymabrey93@gmail.com>
  46.  */
  47. public class StringLengthExample {
  48.  
  49.     public static void main(String[] args) {
  50.  
  51.         final String inputQuickBrownString = convertStreamToString(StringLengthExample.class.getResourceAsStream("unicodetest.txt"), StandardCharsets.UTF_8);
  52.         final String nfcNormalizedString = Normalizer.normalize(inputQuickBrownString, Normalizer.Form.NFC);
  53.         final String nfdNormalizedString = Normalizer.normalize(inputQuickBrownString, Normalizer.Form.NFD);
  54.  
  55.         System.out.println("Input UTF-8 String");
  56.         printAllLengths(inputQuickBrownString);
  57.  
  58.         System.out.println("NFC Normalized UTF-8 String");
  59.         printAllLengths(nfcNormalizedString);
  60.  
  61.         System.out.println("NFD Normalized UTF-8 String");
  62.         printAllLengths(nfdNormalizedString);
  63.  
  64.     }
  65.  
  66.     private static void printAllLengths(String string) {
  67.  
  68.         BreakIterator englishBreaks = BreakIterator.getCharacterInstance(Locale.ENGLISH);
  69.         englishBreaks.setText(string);
  70.         List<Integer> graphemes = new ArrayList<>(string.length());
  71.  
  72.         while (englishBreaks.next() != BreakIterator.DONE) {
  73.             graphemes.add(englishBreaks.current());
  74.         }
  75.  
  76.         System.out.println(String.format(">>\tString.length() = %d", string.length()));
  77.         System.out.println(String.format(">>\tString.codePointCount(int,int) = %d", string.codePointCount(0, string.length())));
  78.         System.out.println(String.format(">>\tBreakIterator.getCharacterInstance(Locale) = %d", graphemes.size()));
  79.     }
  80.  
  81.     private static String convertStreamToString(InputStream is, Charset c) {
  82.         return new Scanner(is, c.name()).useDelimiter("\\A").next();
  83.     }
  84.  
  85. }
Advertisement
RAW Paste Data Copied
Advertisement