Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- module EoTranslator.Translator
- open System
- open System.Net
- open System.IO
- open System.Text
- open System.Text.RegularExpressions
- open System.Threading
- type result =
- {
- // line 1
- id1 : int;
- word : string;
- structure : string;
- baseword : string;
- prioritato : string;
- partOfSpeech : string;
- // line2
- id2 : int;
- langCode : string;
- lang : string;
- definition : string;
- }
- type translation =
- | FromEo
- | ToEo
- let formatOutput s =
- let lines =
- [
- let sr = new StringReader(s)
- let line = ref ""
- while (line := sr.ReadLine(); line.Value <> null) do
- let line' = line.Value.Trim()
- if line' <> "" then
- yield line'
- ]
- match lines with
- | [] -> ("", [])
- | lines ->
- let header, rest =
- match lines with
- | [] -> "", []
- | x::xs ->
- if x.Split('\t').Length = 1 then x, xs
- else "", x::xs
- let rec parse = function
- | [] -> []
- | (line1::line2::xs : string list) ->
- let getIndexOrDefault (arr : string[]) index defaultValue =
- if (index < arr.Length) then arr.[index]
- else defaultValue
- let parts1 = line1.Split('\t')
- let parts2 = line2.Split('\t')
- if parts1.Length >= 5 && parts2.Length >= 4 then
- {
- id1 = int(getIndexOrDefault parts1 0 "0");
- word = getIndexOrDefault parts1 1 "";
- structure = getIndexOrDefault parts1 2 "";
- baseword = getIndexOrDefault parts1 3 "";
- prioritato = getIndexOrDefault parts1 4 "";
- partOfSpeech = getIndexOrDefault parts1 5 "";
- id2 = int(getIndexOrDefault parts2 0 "0");
- langCode = getIndexOrDefault parts2 1 "";
- lang = getIndexOrDefault parts2 2 "";
- definition = getIndexOrDefault parts2 3 ""
- }::parse xs
- else parse xs
- | x::xs -> parse xs
- header, parse rest
- let search language text =
- let delingvo, allingvo =
- match language with
- | FromEo -> "eo", "en"
- | ToEo -> "en", "eo"
- let randomString = Guid.NewGuid().ToString();
- let text' =
- Regex.Replace(text, @"[\(\)!,.;()""_~]", "")
- |> Uri.EscapeDataString
- let url =
- sprintf
- @"http://en.lernu.net/cgi-bin/serchi.pl?modelo=%s&delingvo=%s&allingvo=%s&prioritato=0&starto=0&bobeloid=undefined&dishaki=true&hazardo=%s"
- text'
- delingvo
- allingvo
- randomString
- use client = new System.Net.WebClient()
- let bytes = client.DownloadData(Uri(url))
- let rawOutput = Encoding.UTF8.GetString(bytes)
- let header, results = formatOutput rawOutput
- rawOutput, header, results
Advertisement
Add Comment
Please, Sign In to add comment