Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package com.jagex.runescape;
- final class Censor {
- public static void load(Archive archive) {
- Stream fragmentsEnc = new Stream(
- archive.getFile("fragmentsenc.txt"));
- Stream badEnc = new Stream(archive.getFile("badenc.txt"));
- Stream domainEnc = new Stream(archive.getFile("domainenc.txt"));
- Stream topLevelDomainList = new Stream(
- archive.getFile("tldlist.txt"));
- loadDictionaries(fragmentsEnc, badEnc, domainEnc, topLevelDomainList);
- }
- private static void loadDictionaries(Stream fragment, Stream bad,
- Stream domain, Stream topLevelDomain) {
- loadBadEnc(bad);
- loadDomainEnc(domain);
- loadFragmentsEnc(fragment);
- loadTopLevelDomainList(topLevelDomain);
- }
- private static void loadTopLevelDomainList(Stream stream) {
- int length = stream.getInt();
- topLevelDomains = new char[length][];
- topLevelDomainType = new int[length];
- for (int index = 0; index < length; index++) {
- topLevelDomainType[index] = stream.getUnsignedByte();
- char topLevelDomain[] = new char[stream.getUnsignedByte()];
- for (int d = 0; d < topLevelDomain.length; d++)
- topLevelDomain[d] = (char) stream.getUnsignedByte();
- topLevelDomains[index] = topLevelDomain;
- }
- }
- private static void loadBadEnc(Stream stream) {
- int length = stream.getInt();
- badWords = new char[length][];
- badBytes = new byte[length][][];
- loadBadWords(stream, badWords, badBytes);
- }
- private static void loadDomainEnc(Stream stream) {
- int length = stream.getInt();
- domains = new char[length][];
- loadDomains(domains, stream);
- }
- private static void loadFragmentsEnc(Stream stream) {
- fragments = new int[stream.getInt()];
- for (int i = 0; i < fragments.length; i++)
- fragments[i] = stream.getUnsignedLEShort();
- }
- private static void loadBadWords(Stream stream, char badWords[][],
- byte badBytes[][][]) {
- for (int w = 0; w < badWords.length; w++) {
- char badWord[] = new char[stream.getUnsignedByte()];
- for (int c = 0; c < badWord.length; c++)
- badWord[c] = (char) stream.getUnsignedByte();
- badWords[w] = badWord;
- byte badByte[][] = new byte[stream.getUnsignedByte()][2];
- for (int b = 0; b < badByte.length; b++) {
- badByte[b][0] = (byte) stream.getUnsignedByte();
- badByte[b][1] = (byte) stream.getUnsignedByte();
- }
- if (badByte.length > 0)
- badBytes[w] = badByte;
- }
- }
- private static void loadDomains(char domains[][], Stream stream) {
- for (int d = 0; d < domains.length; d++) {
- char domain[] = new char[stream.getUnsignedByte()];
- for (int c = 0; c < domain.length; c++)
- domain[c] = (char) stream.getUnsignedByte();
- domains[d] = domain;
- }
- }
- private static void formatLegalCharacters(char characters[]) {
- int character = 0;
- for (int c = 0; c < characters.length; c++) {
- if (isLegalCharacter(characters[c]))
- characters[character] = characters[c];
- else
- characters[character] = ' ';
- if (character == 0 || characters[character] != ' '
- || characters[character - 1] != ' ')
- character++;
- }
- for (int c = character; c < characters.length; c++)
- characters[c] = ' ';
- }
- private static boolean isLegalCharacter(char c) {
- return c >= ' ' && c <= '\177' || c == ' ' || c == '\n' || c == '\t'
- || c == '\243' || c == '\u20AC';
- }
- public static String censorString(String string) {
- char stringLetters[] = string.toCharArray();
- formatLegalCharacters(stringLetters);
- String trimmedString = (new String(stringLetters)).trim();
- stringLetters = trimmedString.toLowerCase().toCharArray();
- String lowercaseTrimmedString = trimmedString.toLowerCase();
- removeTLDs(stringLetters);
- censorBadWords(stringLetters);
- removeEmails(stringLetters);
- censorLongNumbers(stringLetters);
- for (int e = 0; e < exceptions.length; e++) {
- for (int c = -1; (c = lowercaseTrimmedString.indexOf(exceptions[e],
- c + 1)) != -1;) {
- char exception[] = exceptions[e].toCharArray();
- System.arraycopy(exception, 0, stringLetters, c,
- exception.length);
- }
- }
- transferCapitals(trimmedString.toCharArray(), stringLetters);
- fixCases(stringLetters);
- return (new String(stringLetters)).trim();
- }
- private static void transferCapitals(char input[], char output[]) {
- for (int c = 0; c < input.length; c++)
- if (output[c] != '*' && isUpperCaseLetter(input[c]))
- output[c] = input[c];
- }
- private static void fixCases(char input[]) {
- boolean skipToNextUppercase = true;
- for (int pos = 0; pos < input.length; pos++) {
- char c = input[pos];
- if (isLetter(c)) {
- if (skipToNextUppercase) {
- if (isLowerCaseLetter(c))
- skipToNextUppercase = false;
- } else if (isUpperCaseLetter(c))
- input[pos] = (char) ((c + 97) - 65);
- } else {
- skipToNextUppercase = true;
- }
- }
- }
- private static void censorBadWords(char input[]) {
- for (int pass = 0; pass < 2; pass++) {
- for (int bad = badWords.length - 1; bad >= 0; bad--)
- censorPart(badBytes[bad], input, badWords[bad]);
- }
- }
- private static void removeEmails(char input[]) {
- char inputAtStripped[] = input.clone();
- char charactersAt[] = { '(', 'a', ')' };
- censorPart(null, inputAtStripped, charactersAt);
- char inputDotStripped[] = input.clone();
- char charactersDot[] = { 'd', 'o', 't' };
- censorPart(null, inputDotStripped, charactersDot);
- for (int d = domains.length - 1; d >= 0; d--)
- removeEmail(input, domains[d], inputDotStripped, inputAtStripped);
- }
- private static void removeEmail(char inputRaw[], char domain[],
- char inputDotStripped[], char inputAtStripped[]) {
- if (domain.length > inputRaw.length)
- return;
- int increment;
- for (int start = 0; start <= inputRaw.length - domain.length; start += increment) {
- int end = start;
- int charactersFound = 0;
- increment = 1;
- while (end < inputRaw.length) {
- int incrementEnd;
- char characterEnd = inputRaw[end];
- char characterNext = '\0';
- if (end + 1 < inputRaw.length)
- characterNext = inputRaw[end + 1];
- if (charactersFound < domain.length
- && (incrementEnd = basicLeetspeekCheck(characterEnd,
- domain[charactersFound], characterNext)) > 0) {
- end += incrementEnd;
- charactersFound++;
- continue;
- }
- if (charactersFound == 0)
- break;
- if ((incrementEnd = basicLeetspeekCheck(characterEnd,
- domain[charactersFound - 1], characterNext)) > 0) {
- end += incrementEnd;
- if (charactersFound == 1)
- increment++;
- continue;
- }
- if (charactersFound >= domain.length
- || !isNotAlphanumeric(characterEnd))
- break;
- end++;
- }
- if (charactersFound >= domain.length) {
- boolean censor = false;
- int atSignBeforeDomain = atSignBeforeDomain(inputRaw,
- inputAtStripped, start);
- int dotAfterDomain = dotAfterDomain(inputDotStripped, end - 1,
- inputRaw);
- if (atSignBeforeDomain > 2 || dotAfterDomain > 2)
- censor = true;
- if (censor) {
- for (int c = start; c < end; c++)
- inputRaw[c] = '*';
- }
- }
- }
- }
- private static int atSignBeforeDomain(char input[], char inputAtStripped[],
- int start) {
- if (start == 0)
- return 2;
- for (int c = start - 1; c >= 0; c--) {
- if (!isNotAlphanumeric(input[c]))
- break;
- if (input[c] == '@')
- return 3;
- }
- int censorCount = 0;
- for (int c = start - 1; c >= 0; c--) {
- if (!isNotAlphanumeric(inputAtStripped[c]))
- break;
- if (inputAtStripped[c] == '*')
- censorCount++;
- }
- if (censorCount >= 3)
- return 4;
- return !isNotAlphanumeric(input[start - 1]) ? 0 : 1;
- }
- private static int dotAfterDomain(char inputDotStripped[], int end,
- char input[]) {
- if (end + 1 == input.length)
- return 2;
- for (int c = end + 1; c < input.length; c++) {
- if (!isNotAlphanumeric(input[c]))
- break;
- if (input[c] == '.' || input[c] == ',')
- return 3;
- }
- int censorCount = 0;
- for (int c = end + 1; c < input.length; c++) {
- if (!isNotAlphanumeric(inputDotStripped[c]))
- break;
- if (inputDotStripped[c] == '*')
- censorCount++;
- }
- if (censorCount >= 3)
- return 4;
- return !isNotAlphanumeric(input[end + 1]) ? 0 : 1;
- }
- private static void removeTLDs(char input[]) {
- char dotStrippedInput[] = input.clone();
- char dotCharacters[] = { 'd', 'o', 't' };
- censorPart(null, dotStrippedInput, dotCharacters);
- char slashStrippedInput[] = input.clone();
- char slashCharacters[] = { 's', 'l', 'a', 's', 'h' };
- censorPart(null, slashStrippedInput, slashCharacters);
- for (int domain = 0; domain < topLevelDomains.length; domain++)
- removeTLD(slashStrippedInput, topLevelDomains[domain],
- topLevelDomainType[domain], dotStrippedInput, input);
- }
- private static void removeTLD(char inputSlashStripped[], char tld[],
- int tldType, char inputDotStripped[], char inputRaw[]) {
- if (tld.length > inputRaw.length)
- return;
- int increment;
- for (int pos = 0; pos <= inputRaw.length - tld.length; pos += increment) {
- int end = pos;
- int charactersFound = 0;
- increment = 1;
- while (end < inputRaw.length) {
- int incrementEnd;
- char characterEnd = inputRaw[end];
- char characterNext = '\0';
- if (end + 1 < inputRaw.length)
- characterNext = inputRaw[end + 1];
- if (charactersFound < tld.length
- && (incrementEnd = basicLeetspeekCheck(characterEnd,
- tld[charactersFound], characterNext)) > 0) {
- end += incrementEnd;
- charactersFound++;
- continue;
- }
- if (charactersFound == 0)
- break;
- if ((incrementEnd = basicLeetspeekCheck(characterEnd,
- tld[charactersFound - 1], characterNext)) > 0) {
- end += incrementEnd;
- if (charactersFound == 1)
- increment++;
- continue;
- }
- if (charactersFound >= tld.length
- || !isNotAlphanumeric(characterEnd))
- break;
- end++;
- }
- if (charactersFound >= tld.length) {
- boolean censor = false;
- int dotBeforeTLD = dotBeforeTLD(inputRaw, pos, inputDotStripped);
- int slashAfterTLD = slashAfterTLD(inputRaw, inputSlashStripped,
- end - 1);
- if (tldType == 1 && dotBeforeTLD > 0 && slashAfterTLD > 0)
- censor = true;
- if (tldType == 2
- && (dotBeforeTLD > 2 && slashAfterTLD > 0 || dotBeforeTLD > 0
- && slashAfterTLD > 2))
- censor = true;
- if (tldType == 3 && dotBeforeTLD > 0 && slashAfterTLD > 2)
- censor = true;
- // boolean _tmp = tldType == 3 && dotBeforeTLD > 2 &&
- // slashAfterTLD > 0; // unused
- if (censor) {
- int censorStart = pos;
- int censorEnd = end - 1;
- if (dotBeforeTLD > 2) {
- if (dotBeforeTLD == 4) {
- boolean breakUncensored = false;
- for (int c = censorStart - 1; c >= 0; c--)
- if (breakUncensored) {
- if (inputDotStripped[c] != '*')
- break;
- censorStart = c;
- } else if (inputDotStripped[c] == '*') {
- censorStart = c;
- breakUncensored = true;
- }
- }
- boolean breakNonAlphanumeric = false;
- for (int c = censorStart - 1; c >= 0; c--)
- if (breakNonAlphanumeric) {
- if (isNotAlphanumeric(inputRaw[c]))
- break;
- censorStart = c;
- } else if (!isNotAlphanumeric(inputRaw[c])) {
- breakNonAlphanumeric = true;
- censorStart = c;
- }
- }
- if (slashAfterTLD > 2) {
- if (slashAfterTLD == 4) {
- boolean breakUncensored = false;
- for (int c = censorEnd + 1; c < inputRaw.length; c++)
- if (breakUncensored) {
- if (inputSlashStripped[c] != '*')
- break;
- censorEnd = c;
- } else if (inputSlashStripped[c] == '*') {
- censorEnd = c;
- breakUncensored = true;
- }
- }
- boolean breakNonAlphanumeric = false;
- for (int c = censorEnd + 1; c < inputRaw.length; c++)
- if (breakNonAlphanumeric) {
- if (isNotAlphanumeric(inputRaw[c]))
- break;
- censorEnd = c;
- } else if (!isNotAlphanumeric(inputRaw[c])) {
- breakNonAlphanumeric = true;
- censorEnd = c;
- }
- }
- for (int c = censorStart; c <= censorEnd; c++)
- inputRaw[c] = '*';
- }
- }
- }
- }
- private static int dotBeforeTLD(char inputRaw[], int start,
- char inputStripped[]) {
- if (start == 0)
- return 2;
- for (int c = start - 1; c >= 0; c--) {
- if (!isNotAlphanumeric(inputRaw[c]))
- break;
- if (inputRaw[c] == ',' || inputRaw[c] == '.')
- return 3;
- }
- int asteriskCount = 0;
- for (int c = start - 1; c >= 0; c--) {
- if (!isNotAlphanumeric(inputStripped[c]))
- break;
- if (inputStripped[c] == '*')
- asteriskCount++;
- }
- if (asteriskCount >= 3)
- return 4;
- return !isNotAlphanumeric(inputRaw[start - 1]) ? 0 : 1;
- }
- private static int slashAfterTLD(char inputRaw[], char inputSlashRemoved[],
- int start) {
- if (start + 1 == inputRaw.length)
- return 2;
- for (int c = start + 1; c < inputRaw.length; c++) {
- if (!isNotAlphanumeric(inputRaw[c]))
- break;
- if (inputRaw[c] == '\\' || inputRaw[c] == '/')
- return 3;
- }
- int asteriskCount = 0;
- for (int c = start + 1; c < inputRaw.length; c++) {
- if (!isNotAlphanumeric(inputSlashRemoved[c]))
- break;
- if (inputSlashRemoved[c] == '*')
- asteriskCount++;
- }
- if (asteriskCount >= 5)
- return 4;
- return !isNotAlphanumeric(inputRaw[start + 1]) ? 0 : 1;
- }
- private static void censorPart(byte bytes[][], char input[], char search[]) {
- if (search.length > input.length)
- return;
- // boolean flag = true; // unused
- int moreDigitsThanLetters;
- for (int start = 0; start <= input.length - search.length; start += moreDigitsThanLetters) {
- int end = start;
- int charactersFound = 0;
- int counter = 0;
- moreDigitsThanLetters = 1;
- boolean notAlphanumericOrApostrophe = false;
- boolean digitUpcoming = false;
- boolean digitEnd = false;
- while (end < input.length && (!digitUpcoming || !digitEnd)) {
- int incrementEnd;
- char characterEnd = input[end];
- char characterNext = '\0';
- if (end + 1 < input.length)
- characterNext = input[end + 1];
- if (charactersFound < search.length
- && (incrementEnd = advancedLeetspeekCheck(
- characterNext, characterEnd,
- search[charactersFound])) > 0) {
- if (incrementEnd == 1 && isDigit(characterEnd))
- digitUpcoming = true;
- if (incrementEnd == 2
- && (isDigit(characterEnd) || isDigit(characterNext)))
- digitUpcoming = true;
- end += incrementEnd;
- charactersFound++;
- continue;
- }
- if (charactersFound == 0)
- break;
- if ((incrementEnd = advancedLeetspeekCheck(characterNext,
- characterEnd, search[charactersFound - 1])) > 0) {
- end += incrementEnd;
- if (charactersFound == 1)
- moreDigitsThanLetters++;
- continue;
- }
- if (charactersFound >= search.length
- || !isDigitOrSymbol(characterEnd))
- break;
- if (isNotAlphanumeric(characterEnd) && characterEnd != '\'')
- notAlphanumericOrApostrophe = true;
- if (isDigit(characterEnd))
- digitEnd = true;
- end++;
- if ((++counter * 100) / (end - start) > 90)
- break;
- }
- if (charactersFound >= search.length
- && (!digitUpcoming || !digitEnd)) {
- boolean censor = true;
- if (!notAlphanumericOrApostrophe) {
- char characterBeforeStart = ' ';
- if (start - 1 >= 0)
- characterBeforeStart = input[start - 1];
- char characterAtEnd = ' ';
- if (end < input.length)
- characterAtEnd = input[end];
- byte codeStart = getCharCode(characterBeforeStart);
- byte codeEnd = getCharCode(characterAtEnd);
- if (bytes != null
- && charCodesCensorable(codeStart, bytes, codeEnd))
- censor = false;
- } else {
- boolean invalidBeforeStart = false;
- boolean invalidAtEnd = false;
- if (start - 1 < 0 || isNotAlphanumeric(input[start - 1])
- && input[start - 1] != '\'')
- invalidBeforeStart = true;
- if (end >= input.length || isNotAlphanumeric(input[end])
- && input[end] != '\'')
- invalidAtEnd = true;
- if (!invalidBeforeStart || !invalidAtEnd) {
- boolean valid = false;
- int c = start - 2;
- if (invalidBeforeStart)
- c = start;
- for (; !valid && c < end; c++)
- if (c >= 0
- && (!isNotAlphanumeric(input[c]) || input[c] == '\'')) {
- char chars[] = new char[3];
- int _c;
- for (_c = 0; _c < 3; _c++) {
- if (c + _c >= input.length
- || isNotAlphanumeric(input[c + _c])
- && input[c + _c] != '\'')
- break;
- chars[_c] = input[c + _c];
- }
- boolean testsPassed = true;
- if (_c == 0)
- testsPassed = false;
- if (_c < 3
- && c - 1 >= 0
- && (!isNotAlphanumeric(input[c - 1]) || input[c - 1] == '\''))
- testsPassed = false;
- if (testsPassed
- && !charactersMatchFragment(chars))
- valid = true;
- }
- if (!valid)
- censor = false;
- }
- }
- if (censor) {
- int countDigits = 0;
- int countLetters = 0;
- int positionLastLetter = -1;
- for (int c = start; c < end; c++)
- if (isDigit(input[c]))
- countDigits++;
- else if (isLetter(input[c])) {
- countLetters++;
- positionLastLetter = c;
- }
- if (positionLastLetter > -1)
- countDigits -= end - 1 - positionLastLetter;
- if (countDigits <= countLetters) {
- for (int c = start; c < end; c++)
- input[c] = '*';
- } else {
- moreDigitsThanLetters = 1;
- }
- }
- }
- }
- }
- private static boolean charCodesCensorable(byte codeBeforeStart,
- byte bytes[][], byte codeAtEnd) {
- int pos = 0;
- if (bytes[pos][0] == codeBeforeStart && bytes[pos][1] == codeAtEnd)
- return true;
- int length = bytes.length - 1;
- if (bytes[length][0] == codeBeforeStart
- && bytes[length][1] == codeAtEnd)
- return true;
- do {
- int newPos = (pos + length) / 2;
- if (bytes[newPos][0] == codeBeforeStart
- && bytes[newPos][1] == codeAtEnd)
- return true;
- if (codeBeforeStart < bytes[newPos][0]
- || codeBeforeStart == bytes[newPos][0]
- && codeAtEnd < bytes[newPos][1])
- length = newPos;
- else
- pos = newPos;
- } while (pos != length && pos + 1 != length);
- return false;
- }
- private static int basicLeetspeekCheck(char firstChararacter, char find,
- char secondCharacter) {
- if (find == firstChararacter)
- return 1;
- if (find == 'o' && firstChararacter == '0')
- return 1;
- if (find == 'o' && firstChararacter == '(' && secondCharacter == ')')
- return 2;
- if (find == 'c'
- && (firstChararacter == '(' || firstChararacter == '<' || firstChararacter == '['))
- return 1;
- if (find == 'e' && firstChararacter == '\u20AC')
- return 1;
- if (find == 's' && firstChararacter == '$')
- return 1;
- return find != 'l' || firstChararacter != 'i' ? 0 : 1;
- }
- private static int advancedLeetspeekCheck(char secondCharacter,
- char firstCharacter, char find) {
- if (find == firstCharacter)
- return 1;
- if (find >= 'a' && find <= 'm') {
- if (find == 'a') {
- if (firstCharacter == '4' || firstCharacter == '@'
- || firstCharacter == '^')
- return 1;
- return firstCharacter != '/' || secondCharacter != '\\' ? 0 : 2;
- }
- if (find == 'b') {
- if (firstCharacter == '6' || firstCharacter == '8')
- return 1;
- return (firstCharacter != '1' || secondCharacter != '3')
- && (firstCharacter != 'i' || secondCharacter != '3') ? 0
- : 2;
- }
- if (find == 'c')
- return firstCharacter != '(' && firstCharacter != '<'
- && firstCharacter != '{' && firstCharacter != '[' ? 0
- : 1;
- if (find == 'd')
- return (firstCharacter != '[' || secondCharacter != ')')
- && (firstCharacter != 'i' || secondCharacter != ')') ? 0
- : 2;
- if (find == 'e')
- return firstCharacter != '3' && firstCharacter != '\u20AC' ? 0
- : 1;
- if (find == 'f') {
- if (firstCharacter == 'p' && secondCharacter == 'h')
- return 2;
- return firstCharacter != '\243' ? 0 : 1;
- }
- if (find == 'g')
- return firstCharacter != '9' && firstCharacter != '6'
- && firstCharacter != 'q' ? 0 : 1;
- if (find == 'h')
- return firstCharacter != '#' ? 0 : 1;
- if (find == 'i')
- return firstCharacter != 'y' && firstCharacter != 'l'
- && firstCharacter != 'j' && firstCharacter != '1'
- && firstCharacter != '!' && firstCharacter != ':'
- && firstCharacter != ';' && firstCharacter != '|' ? 0
- : 1;
- if (find == 'j')
- return 0;
- if (find == 'k')
- return 0;
- if (find == 'l')
- return firstCharacter != '1' && firstCharacter != '|'
- && firstCharacter != 'i' ? 0 : 1;
- if (find == 'm')
- return 0;
- }
- if (find >= 'n' && find <= 'z') {
- if (find == 'n')
- return 0;
- if (find == 'o') {
- if (firstCharacter == '0' || firstCharacter == '*')
- return 1;
- return (firstCharacter != '(' || secondCharacter != ')')
- && (firstCharacter != '[' || secondCharacter != ']')
- && (firstCharacter != '{' || secondCharacter != '}')
- && (firstCharacter != '<' || secondCharacter != '>') ? 0
- : 2;
- }
- if (find == 'p')
- return 0;
- if (find == 'q')
- return 0;
- if (find == 'r')
- return 0;
- if (find == 's')
- return firstCharacter != '5' && firstCharacter != 'z'
- && firstCharacter != '$' && firstCharacter != '2' ? 0
- : 1;
- if (find == 't')
- return firstCharacter != '7' && firstCharacter != '+' ? 0 : 1;
- if (find == 'u') {
- if (firstCharacter == 'v')
- return 1;
- return (firstCharacter != '\\' || secondCharacter != '/')
- && (firstCharacter != '\\' || secondCharacter != '|')
- && (firstCharacter != '|' || secondCharacter != '/') ? 0
- : 2;
- }
- if (find == 'v')
- return (firstCharacter != '\\' || secondCharacter != '/')
- && (firstCharacter != '\\' || secondCharacter != '|')
- && (firstCharacter != '|' || secondCharacter != '/') ? 0
- : 2;
- if (find == 'w')
- return firstCharacter != 'v' || secondCharacter != 'v' ? 0 : 2;
- if (find == 'x')
- return (firstCharacter != ')' || secondCharacter != '(')
- && (firstCharacter != '}' || secondCharacter != '{')
- && (firstCharacter != ']' || secondCharacter != '[')
- && (firstCharacter != '>' || secondCharacter != '<') ? 0
- : 2;
- if (find == 'y')
- return 0;
- if (find == 'z')
- return 0;
- }
- if (find >= '0' && find <= '9') {
- if (find == '0') {
- if (firstCharacter == 'o' || firstCharacter == 'O')
- return 1;
- return (firstCharacter != '(' || secondCharacter != ')')
- && (firstCharacter != '{' || secondCharacter != '}')
- && (firstCharacter != '[' || secondCharacter != ']') ? 0
- : 2;
- }
- if (find == '1')
- return firstCharacter != 'l' ? 0 : 1;
- else
- return 0;
- }
- if (find == ',')
- return firstCharacter != '.' ? 0 : 1;
- if (find == '.')
- return firstCharacter != ',' ? 0 : 1;
- if (find == '!')
- return firstCharacter != 'i' ? 0 : 1;
- else
- return 0;
- }
- private static byte getCharCode(char c) {
- if (c >= 'a' && c <= 'z')
- return (byte) ((c - 97) + 1);
- if (c == '\'')
- return 28;
- if (c >= '0' && c <= '9')
- return (byte) ((c - 48) + 29);
- else
- return 27;
- }
- private static void censorLongNumbers(char input[]) {
- int nextDigit;
- int nextNonDigit = 0;
- int invalidFound = 0;
- int start = 0;
- while ((nextDigit = getFirstDigitPosition(input, nextNonDigit)) != -1) {
- boolean charactersBeforeNextDigit = false;
- for (int c = nextNonDigit; c >= 0 && c < nextDigit
- && !charactersBeforeNextDigit; c++)
- if (!isNotAlphanumeric(input[c]) && !isDigitOrSymbol(input[c]))
- charactersBeforeNextDigit = true;
- if (charactersBeforeNextDigit)
- invalidFound = 0;
- if (invalidFound == 0)
- start = nextDigit;
- nextNonDigit = getFirstNonDigitPosition(input, nextDigit);
- int value = 0;
- for (int c = nextDigit; c < nextNonDigit; c++)
- value = (value * 10 + input[c]) - 48;
- if (value > 255 || nextNonDigit - nextDigit > 8)
- invalidFound = 0;
- else
- invalidFound++;
- if (invalidFound == 4) {
- for (int c = start; c < nextNonDigit; c++)
- input[c] = '*';
- invalidFound = 0;
- }
- }
- }
- private static int getFirstDigitPosition(char input[], int start) {
- for (int c = start; c < input.length && c >= 0; c++)
- if (input[c] >= '0' && input[c] <= '9')
- return c;
- return -1;
- }
- private static int getFirstNonDigitPosition(char input[], int start) {
- for (int c = start; c < input.length && c >= 0; c++)
- if (input[c] < '0' || input[c] > '9')
- return c;
- return input.length;
- }
- private static boolean isNotAlphanumeric(char c) {
- return !isLetter(c) && !isDigit(c);
- }
- private static boolean isDigitOrSymbol(char c) {
- return c < 'a' || c > 'z' || c == 'v' || c == 'x' || c == 'j'
- || c == 'q' || c == 'z';
- }
- private static boolean isLetter(char c) {
- return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
- }
- private static boolean isDigit(char c) {
- return c >= '0' && c <= '9';
- }
- private static boolean isLowerCaseLetter(char c) {
- return c >= 'a' && c <= 'z';
- }
- private static boolean isUpperCaseLetter(char c) {
- return c >= 'A' && c <= 'Z';
- }
- private static boolean charactersMatchFragment(char input[]) {
- boolean noLetters = true;
- for (int c = 0; c < input.length; c++)
- if (!isDigit(input[c]) && input[c] != 0)
- noLetters = false;
- if (noLetters)
- return true;
- int hashCode = hashCode(input);
- int fragmentId = 0;
- int fragmentCount = fragments.length - 1;
- if (hashCode == fragments[fragmentId]
- || hashCode == fragments[fragmentCount])
- return true;
- do {
- int id = (fragmentId + fragmentCount) / 2;
- if (hashCode == fragments[id])
- return true;
- if (hashCode < fragments[id])
- fragmentCount = id;
- else
- fragmentId = id;
- } while (fragmentId != fragmentCount && fragmentId + 1 != fragmentCount);
- return false;
- }
- private static int hashCode(char input[]) {
- if (input.length > 6)
- return 0;
- int code = 0;
- for (int pos = 0; pos < input.length; pos++) {
- char c = input[input.length - pos - 1];
- if (c >= 'a' && c <= 'z')
- code = code * 38 + ((c - 97) + 1);
- else if (c == '\'')
- code = code * 38 + 27;
- else if (c >= '0' && c <= '9')
- code = code * 38 + ((c - 48) + 28);
- else if (c != 0)
- return 0;
- }
- return code;
- }
- private static int[] fragments;
- private static char[][] badWords;
- private static byte[][][] badBytes;
- private static char[][] domains;
- private static char[][] topLevelDomains;
- private static int[] topLevelDomainType;
- private static final String[] exceptions = { "cook", "cook's", "cooks",
- "seeks", "sheet", "woop", "woops", "faq", "noob", "noobs" };
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement