Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- private static final String REGEX_NOT = "^";
- private static final String REGEX_WORD_EXPECTED = "[%s]+";
- private static final String REGEX_LOWERCASE = "a-z";
- private static final String REGEX_UPPERCASE = "A-Z";
- private static final String REGEX_NUMERIC = "0-9";
- private static final String REGEX_WHITESPACE = "\\s+";
- private static final String REGEX_ENGLISH = "[a-zA-Z]+";
- private static final String REGEX_CHINESE = "\u2E80-\u2FD5\u3190-\u319f\u3400-\u4DBF\u4E00-\u9FCC";
- public static int getWordCounts(String from) {
- int zhCount = getSplitedZhWords(from).length;
- int enCount = getSplitedEnWords(from).length;
- Timber.d("zhCount %d | enCount %d", zhCount, enCount);
- return zhCount + enCount;
- }
- public static String getZhWords(String from) {
- return from
- .replaceAll(String.format(REGEX_WORD_EXPECTED, REGEX_NOT + REGEX_CHINESE), "")
- .replaceAll("", " ")
- .replaceAll(REGEX_WHITESPACE, " ")
- .trim();
- }
- public static String getEnWords(String from) {
- return from
- .replaceAll(String.format(REGEX_WORD_EXPECTED, REGEX_NOT + REGEX_NUMERIC + REGEX_LOWERCASE + REGEX_UPPERCASE), " ")
- .replaceAll(String.format(REGEX_WORD_EXPECTED, REGEX_NOT + REGEX_ENGLISH), " ")
- .replaceAll(REGEX_WHITESPACE, " ")
- .trim();
- }
- public static String[] getSplitedZhWords(String from) {
- final String zhWords = getZhWords(from);
- Timber.d("zhWords \n%s\n", zhWords);
- return (TextUtils.isEmpty(zhWords)) ? new String[0] : zhWords.split(REGEX_WHITESPACE);
- }
- public static String[] getSplitedEnWords(String from) {
- final String enWords = getEnWords(from);
- Timber.d("enWords \n%s\n", enWords);
- return (TextUtils.isEmpty(enWords)) ? new String[0] : enWords.split(REGEX_WHITESPACE);
- }
- public static int getWordCounts(String find, String from) {
- return getWordCounts(false, find, from);
- }
- public static int getWordCounts(boolean ignoreCase, String find, String from) {
- if (ignoreCase) find = find.toLowerCase(Locale.ENGLISH);
- if (ignoreCase) from = from.toLowerCase(Locale.ENGLISH);
- final ArrayList<String> fromList = new ArrayList<>();
- fromList.addAll(Arrays.asList(getSplitedZhWords(from)));
- fromList.addAll(Arrays.asList(getSplitedEnWords(from)));
- return Collections.frequency(fromList, find);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement