Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class flu_tweets:
- def __init__(self):
- self.tweets = []
- self.labels = []
- def load(self, tweets_filename, labels_filename = ""):
- f = open(tweets_filename, 'r')
- for tweet in f:
- if tweet != "\n":
- self.tweets.append(tweet)
- f = open(labels_filename, 'r')
- for label in f.readlines():
- b = label.strip("\n"):
- self.labels.append(b)
- def label_tweets(self):
- for tweets in self.tweets:
- print json.loads(tweet)['text']
- while True:
- l = raw_input("Label as pos or neg")
- if l == "pos" or l == "neg":
- self.labels.append(l)
- break
- def count(self, label):
- count = 0
- for i in self.labels:
- if i == label:
- count += 1
- return count
- def trim(self):
- for i in range(len(self.tweets)):
- if json.loads(self.tweets[i])['lang'] != 'en':
- del self.labels[i]
- del self.tweets[i]
- cnt_pos = 0
- cnt_neg = 0
- for i,j in enumerate(self.labels):
- if j == "pos":
- if cnt_pos < 10:
- cnt_pos += 1
- else:
- del self.tweets[i]
- del j
- if j == "neg":
- if cnt_neg < 90:
- cnt_neg += 1
- else:
- del self.tweets[i]
- del j
- def merge(self, another_flu_tweets):
- self.tweets = self.tweets + another_flu_tweets.tweets
- self.labels = self.labels + another_flu_tweets.labels
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement