Advertisement
Guest User

what's on sale?

a guest
Jun 23rd, 2015
594
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.21 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import time
  5. import atom.data
  6. import gdata.sites.client
  7. import gdata.sites.data
  8. import gdata.gauth
  9.  
  10. """Fetch latest deals from Google search, and update Google Sites with them"""
  11.  
  12. import urllib2, re
  13. from time import sleep
  14. from pprint import pprint
  15. from datetime import datetime, date, timedelta
  16. from apiclient.discovery import build
  17.  
  18. def weekly_special(market, wednesday):
  19.     day = wednesday.strftime("%d").lstrip("0")
  20.     month = wednesday.strftime("%m").lstrip("0")
  21.     # because tightarse on ozbargains like it this way
  22.     hit = google_search("intitle:%s/%s" % (day, month) + " intitle:" + market)
  23.     # pprint(hit)
  24.     if not hit:
  25.         print("Google search didn't hit.")
  26.         return
  27.     url = 'https://www.ozbargain.com.au/node/198392'
  28.     url = hit['formattedUrl']
  29.     response = urllib2.urlopen(url)
  30.     html = response.read()
  31.     match = re.search('<div itemprop="description"[^>]*>(.*?)</div>', html, re.S)
  32.     if not match:
  33.         print("Couldn't find description")
  34.     else:
  35.         return match.group(1).replace(
  36.         'href="/', 'href="http://ozbargain.com.au/'
  37.         ) + '''<p>This information is aggregated from
  38.        <a href="%s">ozbargain</a>.
  39.        </p>''' % url
  40.  
  41. def google_search(query):
  42.     # query = query + " tightarse"
  43.     # Build a service object for interacting with the API. Visit
  44.     # the Google APIs Console <http://code.google.com/apis/console>
  45.     # to get an API key for your own application.
  46.     # this developerKey and CX belongs to [email protected]
  47.  
  48.     service = build("customsearch", "v1",
  49.               developerKey="AIzaSyBB_WSkYTglFeKA0RhnDnvmdF2gh79F2lE")
  50.     i = 1
  51.     print("Searching '%s'" % query)
  52.     while (True):
  53.         res = service.cse().list(
  54.             q=query,
  55.             cx='015457467921696338036:jni2mvdmu78',
  56.             start=i,
  57.           ).execute()
  58.         if res['searchInformation']['totalResults'] == '0':
  59.            return False
  60.         else:
  61.            for item in res['items']:
  62.                print(item['formattedUrl'])
  63.                print("\t" + item['htmlTitle'])
  64.            return res['items'][0]
  65.  
  66. # list sites under this account - not for production but for debugging
  67. def print_list_sites(client):
  68.    feed = client.GetSiteFeed()
  69.    print 'Google Sites associated with your account: '
  70.    counter = 0
  71.    for entry in feed.entry:
  72.      print '       %i   %s (%s)' % (counter,entry.title.text, entry.site_name.text)
  73.      counter = counter + 1
  74.    print ' --- The End ---'
  75.  
  76. def get_webpages(client):
  77.    kind = 'webpage'
  78.    print 'Fetching only %s entries' % kind
  79.    uri = '%s?kind=%s' % (client.MakeContentFeedUri(), kind)
  80.    feed = client.GetContentFeed(uri=uri)
  81.  
  82.    print "Fetching content feed of '%s'...\n" % client.site
  83.    feed = client.GetContentFeed()
  84.  
  85.    uri = '%s?kind=%s' % (client.MakeContentFeedUri(),'webpage')
  86.    feed = client.GetContentFeed(uri=uri)
  87.    return {entry.title.text: entry for entry in feed.entry}
  88.    #return feed.entry[0]
  89.  
  90. def copy_search_result_to(page):
  91.    since_wednesday = timedelta(days = ((date.today().weekday() - 2)%7 ))
  92.    formatter = '<html:div xmlns:html="http://www.w3.org/1999/xhtml">%s</html:div>'
  93.    html = weekly_special(page.title.text, date.today() - since_wednesday)
  94.    print(html)
  95.    page.content.html = (formatter % html).encode('utf8')
  96.    print("Page %s content obtained" % page.title.text)
  97.  
  98. def test(client):
  99.    pprint(get_webpages(client))
  100.  
  101. def authorize_google_sites_client(client_id, client_secret, client):
  102.    # obtain oauth2 token
  103.    token_cache_path=os.environ['HOME']+'/.gdata-storage'
  104.    print "Token Cache: %s" % token_cache_path
  105.    try:
  106.       with open(token_cache_path, 'r') as f:
  107.          saved_blob_string=f.read()
  108.          if saved_blob_string is not None:
  109.             token = gdata.gauth.token_from_blob(saved_blob_string)
  110.          else:
  111.             token = None
  112.    except IOError:
  113.       token = None
  114.  
  115.    if token == None :
  116.       print "Getting a new token."
  117.       token = gdata.gauth.OAuth2Token( client_id=client_id,
  118.                                        client_secret=client_secret,
  119.                                        scope='https://sites.google.com/feeds/',
  120.                                        user_agent='whatson-sale-updater')
  121.       url = token.generate_authorize_url(redirect_uri='urn:ietf:wg:oauth:2.0:oob')
  122.       print 'Please go to the URL below and authorize this '
  123.       print 'application, then enter the code it gives you.'
  124.       print '   %s' % url
  125.       code = raw_input("Code: ")
  126.       token.get_access_token(code)
  127.       token.authorize(client)
  128.       saved_blob_string = gdata.gauth.token_to_blob(token)
  129.       f=open (token_cache_path, 'w')
  130.       f.write(saved_blob_string)
  131.    else:
  132.       print "Using a cached token from %s" % token_cache_path
  133.       token.authorize(client)
  134.  
  135.    f.close()
  136.  
  137. client_id = 'your_id'
  138. client_secret = 'your_secret'
  139. client = gdata.sites.client.SitesClient(source='whatson-sale-updater', site='yoursite')
  140. authorize_google_sites_client(client_id, client_secret, client)
  141. pages = get_webpages(client)
  142. copy_search_result_to(pages['Woolworths'])
  143. client.Update(pages['Woolworths'])
  144. copy_search_result_to(pages['Coles'])
  145. client.Update(pages['Coles'])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement