Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // ATTEMPT #1
- public static String[] tokenize(String input) {
- input = input.trim();
- if (input.isEmpty()) return new String[0];
- TreeMap<Integer, String> tokenizedInputMap = new TreeMap<>();
- char activeDelimiter = ' ';
- boolean isSearchingForClosingDelimiter = false;
- int openingDelimiterIndex = 0;
- for (int i = 0; i < input.length(); i++) {
- char currentChar = input.substring(i, i + 1).charAt(0);
- if (isSearchingForClosingDelimiter) {
- int indexOfClosingDelimiter = input.indexOf(String.valueOf(activeDelimiter), openingDelimiterIndex + 1);
- if (indexOfClosingDelimiter != -1) {
- tokenizedInputMap.put(openingDelimiterIndex, input.substring(openingDelimiterIndex + 1, indexOfClosingDelimiter));
- i = indexOfClosingDelimiter + 1;
- } else {
- // unclosed string group
- }
- } else {
- if (validDelimiters.contains(currentChar)) {
- activeDelimiter = currentChar;
- isSearchingForClosingDelimiter = true;
- openingDelimiterIndex = i;
- } else if (currentChar == ' ') {
- tokenizedInputMap.put(openingDelimiterIndex, input.substring(openingDelimiterIndex, i));
- openingDelimiterIndex = i + 1;
- } else if (i == input.length() - 1) {
- tokenizedInputMap.put(openingDelimiterIndex, input.substring(openingDelimiterIndex, i + 1));
- }
- }
- }
- return tokenizedInputMap.values().toArray(new String[0]);
- }
- //==================================================================================================================
- // ATTEMPT #2
- public static String[] tokenize(String input) throws UnclosedDelimiterException {
- input = input.trim();
- if (input.isEmpty()) return new String[0];
- TreeMap<Integer, String> tokenizedInputMap = new TreeMap<>();
- int index = 0;
- CharacterOccurrence openingCO = new CharacterOccurrence(0, input.charAt(0));
- CharacterOccurrence closingCO = new CharacterOccurrence(0, input.charAt(0));
- TokenizerMode mode = TokenizerMode.getModeForCharacter(openingCO.getCharacter());
- if (mode == null) mode = TokenizerMode.SPACE_DELIMITING;
- interpreterLoop:
- while (index < input.length()) {
- switchBlock:
- switch (mode) {
- case SPACE_DELIMITING: {
- closingCO = checkForNextOccurrenceOf(input, index, mode.getValidDelimiters());
- if (closingCO != null) {
- tokenizedInputMap.put(openingCO.getIndex(), input.substring(openingCO.getIndex(), closingCO.getIndex()));
- break switchBlock;
- } else {
- tokenizedInputMap.put(openingCO.getIndex(), input.substring(openingCO.getIndex(), input.length()));
- break interpreterLoop;
- }
- }
- case QUOTE_DELIMITING: {
- closingCO = checkForNextOccurrenceOf(input, index + 1, openingCO.getCharacter());
- if (closingCO == null) throw new UnclosedDelimiterException(openingCO.getCharacter());
- else tokenizedInputMap.put(openingCO.getIndex(), input.substring(openingCO.getIndex() + 1, closingCO.getIndex()));
- break switchBlock;
- }
- }
- if (input.length() >= closingCO.getIndex() + 1) index = closingCO.getIndex() + 1;
- else break interpreterLoop;
- CharacterOccurrence currentCO;
- CharacterOccurrence precedingCO = new CharacterOccurrence(index - 1, input.charAt(index - 1));
- while (input.length() > index) {
- currentCO = new CharacterOccurrence(index, input.charAt(index));
- if (TokenizerMode.getModeForCharacter(currentCO.getCharacter()) == null) { // The currentCO is not a valid delimiter...
- openingCO = currentCO;
- mode = TokenizerMode.getModeForCharacter(openingCO.getCharacter());
- if (mode == null) mode = TokenizerMode.SPACE_DELIMITING;
- continue interpreterLoop;
- } else { // The current character is a valid delimiter...
- precedingCO = currentCO;
- index++;
- }
- }
- break interpreterLoop;
- }
- return tokenizedInputMap.values().toArray(new String[0]);
- }
- public static CharacterOccurrence checkForNextOccurrenceOf(String searchIn, int searchFrom, char... searchChars) {
- TreeMap<Integer, Character> occurrences = new TreeMap<>();
- for (char searchChar: searchChars) {
- int indexOfResult = searchIn.indexOf(String.valueOf(searchChar), searchFrom);
- if (indexOfResult != -1) occurrences.put(indexOfResult, searchChar);
- }
- Map.Entry<Integer, Character> firstResult = occurrences.firstEntry();
- if (firstResult == null) return null;
- else return new CharacterOccurrence(firstResult.getKey(), firstResult.getValue());
- }
- private static class CharacterOccurrence {
- int index;
- char character;
- CharacterOccurrence(int index, char character) {
- this.index = index;
- this.character = character;
- }
- int getIndex() {
- return index;
- }
- char getCharacter() {
- return character;
- }
- }
- private enum TokenizerMode {
- SPACE_DELIMITING (' '),
- QUOTE_DELIMITING ('\'', '"', '`');
- final char[] validDelimiters;
- static Map<Character, TokenizerMode> reverseLookup = new HashMap<>();
- static char[] allValidDelimiters;
- static {
- for (TokenizerMode mode: values()) {
- for (char c: mode.getValidDelimiters()) {
- reverseLookup.put(c, mode);
- }
- }
- }
- TokenizerMode(char... validDelimiters) {
- this.validDelimiters = validDelimiters;
- }
- static char[] getAllValidDelimiters() {
- if (allValidDelimiters == null) {
- allValidDelimiters = new char[reverseLookup.keySet().size()];
- for (int i = 0; i < allValidDelimiters.length; i++) {
- allValidDelimiters[i] = reverseLookup.keySet().toArray(new Character[0])[i];
- }
- }
- return allValidDelimiters;
- }
- boolean checkIfValidDelimiter(char character) {
- for (char c: validDelimiters) {
- if (c == character) return true;
- }
- return false;
- }
- char[] getValidDelimiters() {
- return validDelimiters;
- }
- static TokenizerMode getModeForCharacter(char character) {
- return reverseLookup.get(character);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement