Advertisement
Guest User

Untitled

a guest
Jul 18th, 2019
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.10 KB | None | 0 0
  1. (require '[clojure.data.xml :as xml]
  2. '[clojure.string :as str]
  3. '[clojure.java.io :as io])
  4.  
  5. (defn ext-segs [segs]
  6. (->> segs
  7. (mapcat :content)
  8. (str/join " ")))
  9.  
  10. (defn ext-tuv [tuv]
  11. [(-> tuv
  12. :attrs
  13. :xml/lang
  14. keyword)
  15. (->> tuv
  16. :content
  17. (filter #(= (:tag %) :seg))
  18. ext-segs)])
  19.  
  20. (defn ext-tu [tu]
  21. (->> tu
  22. :content
  23. (filter #(= (:tag %) :tuv))
  24. (mapcat ext-tuv)
  25. (apply array-map)))
  26.  
  27. (defn extract [path]
  28. (let [doc (xml/parse path)
  29. body (-> doc :content (nth 1) :content)
  30. tu-list (->> body
  31. (filter #(= (:tag %) :tu)))]
  32. (map ext-tu tu-list)))
  33.  
  34. (with-open [w-en (io/writer "open.en")
  35. w-th (io/writer "open.th")]
  36. (doseq [path ["opensub/en-th.tmx"
  37. "gnome/en-th.tmx"]]
  38. (with-open [r (io/reader path)]
  39. (->> (extract r)
  40. (map #(do
  41. (prn %)
  42. (.write w-en (:en %))
  43. (.write w-en "\n")
  44. (.write w-th (:th %))
  45. (.write w-th "\n")))
  46. doall))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement