Advertisement
Guest User

Extract Hyperlinks

a guest
May 31st, 2018
229
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.24 KB | None | 0 0
  1. import java.util.ArrayList;
  2. import java.util.List;
  3. import java.util.Scanner;
  4. import java.util.regex.Matcher;
  5. import java.util.regex.Pattern;
  6.  
  7. /**
  8. * Created by IntelliJ IDEA.
  9. * User: LAPD
  10. * Date: 28.5.2018 г.
  11. * Time: 10:37 ч.
  12. */
  13. public class _16ExtractHyperlinks {
  14. public static void main(String[] args) {
  15. Scanner console = new Scanner(System.in);
  16.  
  17. String attributeRegex = "<a[\\s\\S]+?>[\\s\\S]*?<\\/a>";
  18.  
  19. StringBuilder stringBuilder = new StringBuilder();
  20.  
  21. String input;
  22. while (!"END".equals(input = console.nextLine())) {
  23. stringBuilder.append(input);
  24. }
  25.  
  26. Pattern pattern = Pattern.compile(attributeRegex);
  27. Matcher matcher = pattern.matcher(stringBuilder);
  28.  
  29. List<String> attributes = new ArrayList<>();
  30.  
  31. while (matcher.find()) {
  32. String attribute = matcher.group();
  33.  
  34. if (attribute.contains("href")) {
  35. String[] test = attribute.split("<");
  36.  
  37. if (test.length == 3) {
  38. attributes.add(attribute);
  39. }
  40. }
  41. }
  42.  
  43. for (String attribute : attributes) {
  44. System.out.println(attribute);
  45. }
  46.  
  47. }
  48. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement