Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.util.ArrayList;
- import java.util.List;
- import java.util.Scanner;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- /**
- * Created by IntelliJ IDEA.
- * User: LAPD
- * Date: 28.5.2018 г.
- * Time: 10:37 ч.
- */
- public class _16ExtractHyperlinks {
- public static void main(String[] args) {
- Scanner console = new Scanner(System.in);
- String attributeRegex = "<a[\\s\\S]+?>[\\s\\S]*?<\\/a>";
- String urlRegex = "href\\s*=\\s*(\".*?\"|'.*?'|.*?)[\\s>]";
- StringBuilder stringBuilder = new StringBuilder();
- String input;
- while (!"END".equals(input = console.nextLine())) {
- stringBuilder.append(input);
- }
- Pattern pattern = Pattern.compile(attributeRegex);
- Matcher matcher = pattern.matcher(stringBuilder);
- List<String> attributes = new ArrayList<>();
- while (matcher.find()) {
- String attribute = matcher.group();
- if (attribute.contains("href")) {
- String[] test = attribute.split("<");
- if (test.length == 3) {
- attributes.add(attribute);
- }
- }
- }
- pattern = Pattern.compile(urlRegex);
- List<String> urls = new ArrayList<>();
- for (String attribute : attributes) {
- matcher = pattern.matcher(attribute);
- if (matcher.find()){
- String url = matcher.group(1);
- if (url.startsWith("'")||url.startsWith("\"")){
- url=url.substring(1,url.length()-1);
- }
- urls.add(url);
- }
- }
- for (String url : urls) {
- System.out.println(url);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement