Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env ocamlscript
- Ocaml.packs := [ "str" ; "lambdasoup" ; "markup" ]
- --
- open Soup;;
- (* Address to remove trailing .html from *)
- let website_specific = "\\(h*t*t*p*:*/*/*w*w*w*oclaunch.eu.org.*\\).html";;
- (* Rewrite example.com/page.html to example.com/example *)
- let remove_internal_html link =
- (* XXX Better with function like map_attribute and regext in href
- * selector *)
- attribute "href" link
- |> (function None -> ()
- | Some href_url -> (* Cas unmatch -> return as-is *)
- let url_without_html =
- Str.(replace_first
- (regexp website_specific)
- "\\1"
- href_url)
- in
- set_attribute "href" url_without_html link;
- )
- ;;
- let xml soup =
- let open Markup in
- Soup.to_string soup
- |> string |> parse_html |> signals |> xhtml |> write_xml |> to_string
- ;;
- let () =
- let soup = read_channel stdin |> parse in
- soup |> xml |> write_channel stdout
- ;;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement