Untitled

import time

import tweepy
import json
import pandas as pd
import numpy as np
from IPython.display import display

# In this example, the handler is time.sleep(15 * 60),
# but you can of course handle it in any way you want.

def limit_handled(cursor):
    while True:
        try:
            yield cursor.next()
        except tweepy.RateLimitError:
            print("Waiting for rate limit.....")
            time.sleep(15 * 60)
        except tweepy.TweepError :
            print("Error no rate limit.....")
            df.to_json('/media/Samsung/tweet_CNN.json', orient='records', lines=True)


def tweets_to_data_frame(tweets):

    data = pd.DataFrame(data=[tweet.full_text for tweet in tweets], columns=['Tweets'])

    #display(data.head(10))

    data['id'] = np.array([tweet.id for tweet in tweets])
    data['len'] = np.array([len(tweet.full_text) for tweet in tweets])
    data['date'] = np.array([tweet.created_at for tweet in tweets])
    data['source'] = np.array([tweet.source for tweet in tweets])
    data['likes'] = np.array([tweet.favorite_count for tweet in tweets])
    data['retweets'] = np.array([tweet.retweet_count for tweet in tweets])
    #df.append(data)

    return data
    #last_visited = tweets[-1]._json['id'] - 1
    #last_visited = (tweets[-1].id )- 1
   # df.append(data, ignore_index=True)
   # return last_visited

if __name__ == '__main__':

    consumer_key = "r8y13uUfAduFvj8X4dsEdAMWG"
    consumer_secret = "gBAaLFgxTXXnLlnqAmPJKpD22Cg3L0I6tW9yy0kdY34pv8cEZO"
    access_token = "1150328765597310976-Sww8Yua53EmGOnkr09cPkUylS1hSuO"
    access_token_secret = "OIIurB22pKTax4FYbs9QNRuXTLFhasSLfqI0Zd4lkjkm4"

    df = pd.DataFrame()


    #Now it’s time to create our API object.
    #APLLICATION-ONLY AUTHENTICATTION.......
    #Requests / window(app auth) segun tabla twitter: https://developer.twitter.com/en/docs/basics/rate-limits
    #Requests / window (app auth) en search es de 450 requests por 15 minutos windows

    # Creating the authentication  app object
    auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)


    # Creating the API object while passing in auth information
    api = tweepy.API(auth, wait_on_rate_limit=True)
    result= []

    #//OPTION 1 : WITH JSON ELEMENTS

    #with open('tweet.json', 'a', encoding='utf8') as file:
    # foreach through all tweets pulled
    #    for tweet in results:
           # printing the text stored inside the tweet object
    #       print (tweet.text)
    #       result.append({
    #            'text': tweet.text,
    #            'author_name': tweet.user.screen_name
    #        })
    #       json.dump(tweet._json,file,sort_keys = True,indent = 4)

    #The search term you want to find
    query = "@CNN"
    # Language code (follows ISO 639-1 standards)
    #language = "en"
    pag=0
    total_downloaded_twitters = 0
    last_id = None
    results = True
    for results in limit_handled(tweepy.Cursor(api.search, q=query, count=100, tweet_mode='extended', max_id=last_id).pages()):
        if pag >= 5000:
            break
        print("Pagina" + str(pag))
        #results = api.search(q=query, count=100, tweet_mode='extended', max_id=last_id)
      #  total_downloaded_twitters = total_downloaded_twitters + results. __length_hint__
       # print("...%s tweets downloaded so far" + str(total_downloaded_twitters))
        dfReaded = tweets_to_data_frame ( results)
        print (dfReaded)
        df = df.append(dfReaded)
        #last_id = results[-1]._json['id'-1]
        print("\nahora va el df ....")
        print(df.__len__())
        if (len(results) == 0):
            print("There is nothing left to download ....")
            break
        else:
            last_id = results[-1].id - 1

        print("here is not there")
        print (last_id)
        pag = pag+ 1

    print("Tweets to write :")
    print ( len(df.index))

    df = df.set_index(['id'])

    df = df.to_json('/media/Samsung/tweet_CNN.json',orient ='records',lines= True)

    #del df

    #df = pd.read_json('tweetCNN.json', orient='records',lines= True)

    print(df)
   # with open('tweetData.json', 'w', encoding='utf8') as file:

    #        json.dumps(df,file)