Advertisement
Guest User

Untitled

a guest
Feb 6th, 2016
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.89 KB | None | 0 0
  1. #!/usr/bin/env ocamlscript
  2. Ocaml.packs := [ "str" ; "lambdasoup" ; "markup" ]
  3. --
  4. open Soup;;
  5.  
  6. (* Address to remove trailing .html from *)
  7. let website_specific = "\\(h*t*t*p*:*/*/*w*w*w*oclaunch.eu.org.*\\).html";;
  8.  
  9. (* Rewrite example.com/page.html to example.com/example *)
  10. let remove_internal_html link =
  11. (* XXX Better with function like map_attribute and regext in href
  12. * selector *)
  13. attribute "href" link
  14. |> (function None -> ()
  15. | Some href_url -> (* Cas unmatch -> return as-is *)
  16. let url_without_html =
  17. Str.(replace_first
  18. (regexp website_specific)
  19. "\\1"
  20. href_url)
  21. in
  22. set_attribute "href" url_without_html link;
  23. )
  24. ;;
  25.  
  26. let xml soup =
  27. let open Markup in
  28. Soup.to_string soup
  29. |> string |> parse_html |> signals |> xhtml |> write_xml |> to_string
  30. ;;
  31.  
  32. let () =
  33. let soup = read_channel stdin |> parse in
  34.  
  35. soup |> xml |> write_channel stdout
  36. ;;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement