Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- (require '[clojure.data.xml :as xml]
- '[clojure.string :as str]
- '[clojure.java.io :as io])
- (defn ext-segs [segs]
- (->> segs
- (mapcat :content)
- (str/join " ")))
- (defn ext-tuv [tuv]
- [(-> tuv
- :attrs
- :xml/lang
- keyword)
- (->> tuv
- :content
- (filter #(= (:tag %) :seg))
- ext-segs)])
- (defn ext-tu [tu]
- (->> tu
- :content
- (filter #(= (:tag %) :tuv))
- (mapcat ext-tuv)
- (apply array-map)))
- (defn extract [path]
- (let [doc (xml/parse path)
- body (-> doc :content (nth 1) :content)
- tu-list (->> body
- (filter #(= (:tag %) :tu)))]
- (map ext-tu tu-list)))
- (with-open [w-en (io/writer "open.en")
- w-th (io/writer "open.th")]
- (doseq [path ["opensub/en-th.tmx"
- "gnome/en-th.tmx"]]
- (with-open [r (io/reader path)]
- (->> (extract r)
- (map #(do
- (prn %)
- (.write w-en (:en %))
- (.write w-en "\n")
- (.write w-th (:th %))
- (.write w-th "\n")))
- doall))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement