Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2018
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.18 KB | None | 0 0
  1. #import regex
  2. import re
  3. from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
  4. import pandas as pd
  5. import csv
  6.  
  7. factory = StemmerFactory()
  8. stemmer = factory.create_stemmer()
  9.  
  10. #start process_tweet
  11. def processTweet(tweet):
  12. # process the tweets
  13.  
  14. #Convert to lower case
  15. tweet = tweet.lower()
  16. #Convert www.* or https?://* to URL
  17. tweet = re.sub("((www\.[^\s]+)|(https?://[^\s]+))","URL",tweet)
  18. #Convert @username to AT_USER
  19. tweet = re.sub("@[^\s]+","AT_USER",tweet)
  20. #Remove additional white spaces
  21. tweet = re.sub("[\s]+", " ", tweet)
  22. #Replace #word with word
  23. tweet = re.sub(r"#([^\s]+)", r"\1", tweet)
  24. #trim
  25. tweet = tweet.strip('\'"')
  26. #stemming
  27. tweet = stemmer.stem(tweet)
  28. return tweet
  29. #end
  30.  
  31. #Read the tweets one by one and process it
  32. fp = open("ambil.csv", "r+", encoding="ascii", errors="ignore")
  33. line = fp.readline()
  34. processedTweets = []
  35. with open("hasilstemming.csv", "w") as csvFile:
  36. writer = csv.writer(csvFile)
  37. writer.writerows(processedTweets)
  38. csvFile.close()
  39. while line:
  40. processedTweet = processTweet(line)
  41. print (processedTweet)
  42. line = fp.readline()
  43. #end loop
  44. fp.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement