Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * The MIT License (MIT)
- *
- * Copyright © 2016 Emily Mabrey
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
- package org.emabrey.stackoverflow.java;
- import java.io.InputStream;
- import java.nio.charset.Charset;
- import java.nio.charset.StandardCharsets;
- import java.text.BreakIterator;
- import java.text.Normalizer;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.Locale;
- import java.util.Scanner;
- /**
- * Example class for use in answering <a href="http://stackoverflow.com/a/41309320/2446574">this StackOverflow question
- * on getting the length of a String</a>. This class requires a file named "unicodetest.txt" be placed alongside it in
- * the same package as a JAR resource; that file should contain the contents of the Quick Brown Unicode file available
- * <a href="http://www.cl.cam.ac.uk/~mgk25/ucs/examples/quickbrown.txt"> here</a>.
- *
- * @see http://stackoverflow.com/a/41309320/2446574
- * @see http://www.cl.cam.ac.uk/~mgk25/ucs/examples/quickbrown.txt
- *
- * @author Emily Mabrey <[email protected]>
- */
- public class StringLengthExample {
- public static void main(String[] args) {
- final String inputQuickBrownString = convertStreamToString(StringLengthExample.class.getResourceAsStream("unicodetest.txt"), StandardCharsets.UTF_8);
- final String nfcNormalizedString = Normalizer.normalize(inputQuickBrownString, Normalizer.Form.NFC);
- final String nfdNormalizedString = Normalizer.normalize(inputQuickBrownString, Normalizer.Form.NFD);
- System.out.println("Input UTF-8 String");
- printAllLengths(inputQuickBrownString);
- System.out.println("NFC Normalized UTF-8 String");
- printAllLengths(nfcNormalizedString);
- System.out.println("NFD Normalized UTF-8 String");
- printAllLengths(nfdNormalizedString);
- }
- private static void printAllLengths(String string) {
- BreakIterator englishBreaks = BreakIterator.getCharacterInstance(Locale.ENGLISH);
- englishBreaks.setText(string);
- List<Integer> graphemes = new ArrayList<>(string.length());
- while (englishBreaks.next() != BreakIterator.DONE) {
- graphemes.add(englishBreaks.current());
- }
- System.out.println(String.format(">>\tString.length() = %d", string.length()));
- System.out.println(String.format(">>\tString.codePointCount(int,int) = %d", string.codePointCount(0, string.length())));
- System.out.println(String.format(">>\tBreakIterator.getCharacterInstance(Locale) = %d", graphemes.size()));
- }
- private static String convertStreamToString(InputStream is, Charset c) {
- return new Scanner(is, c.name()).useDelimiter("\\A").next();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement