Advertisement
dimipan80

Java Regex: Semantic HTML

Aug 4th, 2017
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 3.35 KB | None | 0 0
  1. /*
  2.  * You are given an HTML code, written in the old non-semantic style using tags like <div id="header">, <div class="section">, etc.
  3.  * Your task is to write a program that converts this HTML to semantic HTML by changing tags like <div id="header">
  4.      to their semantic equivalent like <header>.
  5.  * The non-semantic tags that should be converted are always <div>s and have either id or class with one of the following values:
  6.      "main", "header", "nav", "article", "section", "aside" or "footer".
  7.  * Their corresponding closing tags are always followed by a comment like <!-- header -->, <!-- nav -->, etc.
  8.      staying at the same line, after the tag.
  9.  * Each line from the input holds either an HTML opening tag or an HTML closing tag or HTML text content.
  10.  * There will be no tags that span several lines and no lines that hold multiple tags.
  11.  * Attributes values will always be enclosed in double quotes ".
  12.  * Tags will never have id and class at the same time.
  13.  * The HTML will be valid. Opening and closing tags will match correctly.
  14.  * Whitespace may occur between attribute names, values and around comments.
  15.  * The Input will be read from the console.
  16.  * It will contain a variable number of lines and will end with the keyword "END".
  17.  *  Output Format:
  18.  * The Output is the semantic version of the input HTML.
  19.  * In all converted tags you should replace multiple spaces (like <header      style="color:red">) with a single space
  20.  *   and remove excessive spaces at the end (like <footer      >).
  21.  */
  22.  
  23. import java.io.BufferedReader;
  24. import java.io.IOException;
  25. import java.io.InputStreamReader;
  26. import java.util.regex.Matcher;
  27. import java.util.regex.Pattern;
  28.  
  29. public class SemanticHTML {
  30.     public static void main(String[] args) throws IOException {
  31.         Pattern openTagPattern = Pattern
  32.                 .compile("(\\s*<)div\\s(.*?)\\s?((?:id|class)\\s*=\\s*\"(?<tag>[a-z]+)\")\\s?(.*?)\\s?>");
  33.  
  34.         Pattern closedTagPattern = Pattern.compile("(\\s*</)div>\\s+<!--\\s*(?<tag>[a-z]+)\\s*-->");
  35.         BufferedReader reader =
  36.                 new BufferedReader(new InputStreamReader(System.in));
  37.  
  38.         String inputLine = reader.readLine();
  39.         Matcher openMatch;
  40.         Matcher closedMatch;
  41.         while (!inputLine.equals("END")) {
  42.             openMatch = openTagPattern.matcher(inputLine);
  43.             closedMatch = closedTagPattern.matcher(inputLine);
  44.  
  45.             if (openMatch.find()) {
  46.                 System.out.println(convertToOpeningSemanticTag(openMatch));
  47.             } else if (closedMatch.find()) {
  48.                 System.out.printf("%s%s>\n", closedMatch.group(1), closedMatch.group("tag"));
  49.             } else {
  50.                 System.out.println(inputLine);
  51.             }
  52.  
  53.             inputLine = reader.readLine();
  54.         }
  55.     }
  56.  
  57.     private static String convertToOpeningSemanticTag(Matcher fullMatch) {
  58.         StringBuilder sb = new StringBuilder();
  59.         sb.append(fullMatch.group(1)).append(fullMatch.group("tag"));
  60.  
  61.         String firstAdd = fullMatch.group(2).trim().replaceAll("\\s+", " ");
  62.         String secondAdd = fullMatch.group(5).trim().replaceAll("\\s+", " ");
  63.  
  64.         if (firstAdd.length() > 0) {
  65.             sb.append(' ').append(firstAdd);
  66.         }
  67.  
  68.         if (secondAdd.length() > 0) {
  69.             sb.append(' ').append(secondAdd);
  70.         }
  71.  
  72.         return sb.append('>').toString();
  73.     }
  74. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement