Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from spider.models import Run,Dataset
- from gevent.pool import Pool
- pool = Pool(500)
- import os
- from django.core.files import File
- import shutil
- import unicodecsv as csv
- from dropbox import client
- import zipfile
- from traceback import format_exc
- access_token = 'your dropbox api key'
- client = client.DropboxClient(access_token)
- def data_change(data):
- if data['hotel_url'] is not None:
- state = data['state']
- del data['state']
- del data['id']
- try:
- data['required_at_checkin'] = ','.join([str(i) for i in data['required_at_checkin']]) if data['required_at_checkin'] else ''
- csvfieldnames = ['hotel_name','hotel_address','required_at_checkin','checkin_age','policies','hotel_url']
- filename = state.lower().replace(' ','_')+".csv"
- writefile = open("../665_hotels_com/"+filename,'a')
- mycsv = csv.DictWriter(writefile, fieldnames=csvfieldnames)
- mycsv.writerow(data)
- writefile.close()
- except:
- print "**************************"
- trace = format_exc()
- print data
- print "-->",state
- print trace
- def zipdir(source, destination):
- folder = os.path.abspath(source)
- with zipfile.ZipFile(destination, 'w') as zipf:
- for root, dirs, files in os.walk(folder):
- path = os.path.relpath(root, folder)
- for filename in files:
- relname = os.path.join(path, filename)
- absname = os.path.join(root, filename)
- if not filename.endswith(".pyc"):
- zipf.write(absname, relname, zipfile.ZIP_DEFLATED)
- def process(run_id):
- run = Run.objects.get(id=run_id)
- dataset = Dataset.objects.get(run=run, item__name='hotel')
- state_names = [
- 'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California','Colorado','Connecticut', 'Delaware',
- 'District of Columbia', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa',
- 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
- 'Mississippi', 'Missouri','Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
- 'New York', 'North Carolina', 'North Dakota','Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island',
- 'South Carolina', 'South Dakota','Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
- 'West Virginia', 'Wisconsin', 'Wyoming'
- ]
- csvfieldnames = ['hotel_name','hotel_address','required_at_checkin','checkin_age','policies','hotel_url']
- if not os.path.exists('../%s'%(str(run.spider.name))):
- os.mkdir('../%s'%str(run.spider.name))
- for i in state_names:
- filename = i.replace(' ','_').lower()+".csv"
- newfile = open('../%s/%s'%(str(run.spider.name),filename),'w')
- mycsv = csv.DictWriter(newfile, fieldnames=csvfieldnames)
- mycsv.writeheader()
- newfile.close()
- pool.map(data_change, dataset.scan())
- try:
- response = client.file_create_folder('/nest/')
- except:
- pass
- try:
- response = client.file_create_folder('/nest/%s'%(str(run.spider.name)))
- except:
- pass
- try:
- response = client.file_create_folder('/nest/%s/%s'%(str(run.spider.name),str(run.id)))
- except:
- pass
- for i in state_names:
- filename = i.replace(' ','_').lower()+".csv"
- print filename
- csvfile = list(csv.DictReader(open('../%s/%s'%(str(run.spider.name),filename),'r')))
- if len(csvfile)>0:
- print "ok -->",filename
- file_to = File(open('../%s/%s'%(str(run.spider.name),filename),'r'))
- client.put_file("/nest/%s/%s/%s"%(str(run.spider.name),str(run.id),filename),file_to)
- else:
- os.remove('../%s/%s'%(str(run.spider.name),filename))
- print "file removed from ../%s/%s"%(str(run.spider.name),filename)
- zipfilename = str(run.spider.name) + ".zip"
- zipf = zipfile.ZipFile(zipfilename,'w')
- zipdir('../%s/'%(str(run.spider.name)), zipfilename)
- file_to = File(open(zipfilename,"r"))
- client.put_file("/nest/%s/%s"%(str(run.spider.name),zipfilename),file_to)
- os.remove(zipfilename)
- shutil.rmtree("../%s"%(str(run.spider.name)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement