Advertisement
Guest User

Untitled

a guest
Apr 26th, 2019
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.58 KB | None | 0 0
  1. open Microsoft.ML
  2. open Microsoft.ML
  3. open Microsoft.ML.Data
  4. open Microsoft.ML.Transforms.Text
  5.  
  6. let stopwords = [|"ourselves"; "hers"; "between"; "yourself"; "but"; "again"; "there"; "about"; "once"; "during"; "out"; "very"; "having"; "with"; "they"; "own"; "an"; "be"; "some"; "for"; "do"; "its"; "yours"; "such"; "into"; "of"; "most"; "itself"; "other"; "off"; "is"; "s"; "am"; "or"; "who"; "as"; "from"; "him"; "each"; "the"; "themselves"; "until"; "below"; "are"; "we"; "these"; "your"; "his"; "through"; "don"; "nor"; "me"; "were"; "her"; "more"; "himself"; "this"; "down"; "should"; "our"; "their"; "while"; "above"; "both"; "up"; "to"; "ours"; "had"; "she"; "all"; "no"; "when"; "at"; "any"; "before"; "them"; "same"; "and"; "been"; "have"; "in"; "will"; "on"; "does"; "yourselves"; "then"; "that"; "because"; "what"; "over"; "why"; "so"; "can"; "did"; "not"; "now"; "under"; "he"; "you"; "herself"; "has"; "just"; "where"; "too"; "only"; "myself"; "which"; "those"; "i"; "after"; "few"; "whom"; "t";"ll"; "being"; "if"; "theirs"; "my"; "against"; "a"; "by"; "doing"; "it"; "how"; "further"; "was"; "here"; "than"; "s"; "t"; "m"; "'re"; "'ll";"ve";"..."; "ä±"; "''"; "``"; "--"; "'d"; "el"; "la"; "que"; "y"; "de"; "en"|]
  7. let symbols = [|'\''; ' '; ','|]
  8.  
  9. let renderLineChartForWords(words: seq<string>) =
  10. words
  11. |> Seq.countBy id
  12. |> Seq.sortByDescending(fun (value:string, count :int) -> count)
  13. |> Seq.take 15
  14. |> Chart.Line
  15.  
  16. let tokenizeLyrics (lyrics: seq<LyricsInput>) =
  17. let mlContext = MLContext(seed = Nullable 0)
  18. let data = mlContext.Data.LoadFromEnumerable lyrics
  19.  
  20. let pipeline = mlContext.Transforms.Text.FeaturizeText("FeaturizedLyrics", "Lyrics")
  21. .Append(mlContext.Transforms.Text.NormalizeText("NormalizedLyrics", "Lyrics"))
  22. .Append(mlContext.Transforms.Text.TokenizeWords("TokenizedLyric", "NormalizedLyrics", symbols))
  23. .Append(mlContext.Transforms.Text.RemoveStopWords("LyricsWithNoCustomStopWords", "TokenizedLyric", stopwords))
  24. .Append(mlContext.Transforms.Text.RemoveDefaultStopWords("LyricsWithNoStopWords", "LyricsWithNoCustomStopWords"))
  25.  
  26. let transformedData = pipeline.Fit(data).Transform(data)
  27. transformedData.GetColumn<string[]>(mlContext, "LyricsWithNoStopWords")
  28. |> Seq.concat
  29. |> Seq.toList
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement