Advertisement
ptrelford

NDC Oslo twitter word cloud

Jun 21st, 2015
350
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
F# 2.84 KB | None | 0 0
  1. #I @"packages\FSharp.Data.Toolbox.Twitter.0.6\lib\net40"
  2. #I @"packages\FSharp.Data.2.1.1\lib\net40"
  3.  
  4. #r @".\packages\FSharp.Data.2.1.1\lib\net40\FSharp.Data.dll"
  5. #r @".\packages\FSharp.Data.Toolbox.Twitter.0.6\lib\net40\FSharp.Data.Toolbox.Twitter.dll"
  6.  
  7. open FSharp.Data.Toolbox.Twitter
  8.  
  9. let key = "mKQL29XNemjQbLlQ8t0pBg"
  10. let secret = "T27HLDve1lumQykBUgYAbcEkbDrjBe6gwbu0gqi4saM"
  11. let twitter = Twitter.AuthenticateAppOnly(key, secret)
  12.  
  13. open System
  14. open System.IO
  15.  
  16. let stopWords = File.ReadAllLines(__SOURCE_DIRECTORY__ + "\\StopWords.txt")
  17.  
  18. let words (text:string) = text.Split([|'#';'@';'.';',';';';':';'!';'?';'`';' ';'\r';'\n';'"';'\'';'“';'”';'(';')';'+';'-'|], StringSplitOptions.RemoveEmptyEntries)
  19.  
  20. let rec getWords id =
  21.    let ndc =
  22.       match id with
  23.       | Some id -> twitter.Search.Tweets("#ndcoslo",count=100,sinceId=id)
  24.       | None -> twitter.Search.Tweets("#ndcoslo",count=100)        
  25.    let statuses = ndc.Statuses  
  26.    let words =
  27.       statuses
  28.       |> Seq.map (fun tweet -> tweet.Text)
  29.       |> String.concat " "
  30.       |> words
  31.       |> Seq.map (fun s -> s.ToLower())
  32.       |> Seq.filter (fun word -> word <> "ndcoslo")
  33.       |> Seq.filter (fun word -> word.StartsWith("http") |> not)
  34.       |> Seq.filter (fun word -> word.StartsWith("co/") |> not)
  35.       |> Seq.filter (fun word -> word.StartsWith("//t") |> not)
  36.       |> Seq.filter (fun word -> word <> "rt")
  37.       |> Seq.filter (fun word -> word <> "ht")
  38.       |> Seq.filter (fun word -> word <> "co")
  39.       |> Seq.filter (fun word -> word <> "cc")
  40.       |> Seq.filter (fun word -> word <> "yo")
  41.       |> Seq.filter (fun word -> word <> "bit")
  42.       |> Seq.filter (fun word -> word <> "didn")
  43.       |> Seq.filter (fun word -> word <> "dont")
  44.       |> Seq.filter (fun word -> word <> "isn")
  45.       |> Seq.filter (fun word -> word <> "htt")
  46.       |> Seq.filter (fun word -> word <> "&amp")      
  47.       |> Seq.filter (fun word -> word.Length > 2)
  48.       |> Seq.filter (fun word -> word.EndsWith("…") |> not)
  49.       |> Seq.filter (fun word -> word.EndsWith("%") |> not)
  50.       |> Seq.filter (fun word -> let x,_ = Int32.TryParse(word) in not x)      
  51.       |> Seq.filter (fun word -> stopWords |> Array.exists ((=) word) |> not)
  52.       |> Seq.toList
  53.    if statuses |> Seq.length > 0 then
  54.       let last = statuses |> Seq.last
  55.       let s = last.CreatedAt
  56.       // "Fri Jun 19 13:11:05 +0000 2015" -> "Jun 19 2015"
  57.       let date = s.[4..10] + s.[26..]
  58.       printfn "%A" date
  59.       if DateTime.Parse(date) >= System.DateTime(2015,6,15)
  60.       then words @ getWords (Some last.Id)
  61.       else []
  62.    else []
  63.  
  64. let allWords = getWords None
  65. let ndcoslo = allWords |> String.concat " "
  66. File.WriteAllText(@"C:\temp\ndcoslo.txt", ndcoslo)
  67.  
  68. let myCounts =
  69.    allWords
  70.    |> Seq.countBy id
  71.    |> Seq.sortBy (fun (_, count) -> -count)
  72.    |> Seq.take 50
  73.    |> Seq.toArray
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement