Untitled

import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HTMLParser {
    public static void main(String[] args) {
        Scanner scanner = new Scanner(System.in);
        String line = scanner.nextLine();

        //title
        String regex = "<title>(?<title>.+)</title>";
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern.matcher(line);
        if (matcher.find()) {
            System.out.println("Title: " + matcher.group("title"));
        } else {
            System.out.println("Title not matched!");
            return;
        }

        //The content should be a single string.
        //There might be different tags inside of the body, which you must ignore.

        //first get whole body
        String content;
        regex = "<body>(?<body>.+)</body>";
        pattern = Pattern.compile(regex);
        matcher = pattern.matcher(line);
        if (matcher.find()) {
            content = (matcher.group("body"));
        } else {
            System.out.println("Body not matched!");
            return;
        }

        //clear tags
//        regex = "<.*?>";
        regex="<(\"[^\"]*?\"|'[^']*?'|[^'\">])*>";
        pattern = Pattern.compile(regex);
        matcher = pattern.matcher(content);

        while (matcher.find()) {
            content = content.replace(matcher.group(), " ");
        }

        //clear \n symbols and more then one spaces
        content = content.replace("\\n", " ");
        content = content.replaceAll("\\s+", " ").trim();
        System.out.println("Content: " + content);

    }
}