Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- open Microsoft.ML
- open Microsoft.ML
- open Microsoft.ML.Data
- open Microsoft.ML.Transforms.Text
- let stopwords = [|"ourselves"; "hers"; "between"; "yourself"; "but"; "again"; "there"; "about"; "once"; "during"; "out"; "very"; "having"; "with"; "they"; "own"; "an"; "be"; "some"; "for"; "do"; "its"; "yours"; "such"; "into"; "of"; "most"; "itself"; "other"; "off"; "is"; "s"; "am"; "or"; "who"; "as"; "from"; "him"; "each"; "the"; "themselves"; "until"; "below"; "are"; "we"; "these"; "your"; "his"; "through"; "don"; "nor"; "me"; "were"; "her"; "more"; "himself"; "this"; "down"; "should"; "our"; "their"; "while"; "above"; "both"; "up"; "to"; "ours"; "had"; "she"; "all"; "no"; "when"; "at"; "any"; "before"; "them"; "same"; "and"; "been"; "have"; "in"; "will"; "on"; "does"; "yourselves"; "then"; "that"; "because"; "what"; "over"; "why"; "so"; "can"; "did"; "not"; "now"; "under"; "he"; "you"; "herself"; "has"; "just"; "where"; "too"; "only"; "myself"; "which"; "those"; "i"; "after"; "few"; "whom"; "t";"ll"; "being"; "if"; "theirs"; "my"; "against"; "a"; "by"; "doing"; "it"; "how"; "further"; "was"; "here"; "than"; "s"; "t"; "m"; "'re"; "'ll";"ve";"..."; "ä±"; "''"; "``"; "--"; "'d"; "el"; "la"; "que"; "y"; "de"; "en"|]
- let symbols = [|'\''; ' '; ','|]
- let renderLineChartForWords(words: seq<string>) =
- words
- |> Seq.countBy id
- |> Seq.sortByDescending(fun (value:string, count :int) -> count)
- |> Seq.take 15
- |> Chart.Line
- let tokenizeLyrics (lyrics: seq<LyricsInput>) =
- let mlContext = MLContext(seed = Nullable 0)
- let data = mlContext.Data.LoadFromEnumerable lyrics
- let pipeline = mlContext.Transforms.Text.FeaturizeText("FeaturizedLyrics", "Lyrics")
- .Append(mlContext.Transforms.Text.NormalizeText("NormalizedLyrics", "Lyrics"))
- .Append(mlContext.Transforms.Text.TokenizeWords("TokenizedLyric", "NormalizedLyrics", symbols))
- .Append(mlContext.Transforms.Text.RemoveStopWords("LyricsWithNoCustomStopWords", "TokenizedLyric", stopwords))
- .Append(mlContext.Transforms.Text.RemoveDefaultStopWords("LyricsWithNoStopWords", "LyricsWithNoCustomStopWords"))
- let transformedData = pipeline.Fit(data).Transform(data)
- transformedData.GetColumn<string[]>(mlContext, "LyricsWithNoStopWords")
- |> Seq.concat
- |> Seq.toList
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement