Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import unicodecsv as csv
- from datetime import datetime
- date = datetime.today().strftime("%Y%m%d")
- import glob
- files = glob.glob("/home/sharat/Downloads/601_rory_tripadvisor_hotel*.csv")
- print files
- hotels,hotelswithbwt = [],[]
- count = 0
- keys = ["hotelName","partnerName","bookDirect","bookingSite1","bookingSite2","bookingSite3","bookingSites","noOfReviews","starReviews","checkIn","checkOut","city","hotelPage"]
- file1 = open("/home/sharat/Documents/"+date+"tripadvisor.csv","w")
- mycsv = csv.DictWriter(file1,fieldnames=keys,encoding='utf-8')
- mycsv.writeheader()
- hotelswithyes = []
- for i in files:
- total = list(csv.DictReader(open(i,'r')))
- for j in total:
- try:
- j.pop('pageNo')
- except:
- pass
- if j['bookDirect'] == 'YES' and (j['hotelName'],j['bookDirect'],j['city']) not in hotelswithbwt:
- count = count + 1
- hotelswithbwt.append((j['hotelName'],j['bookDirect'],j['city']))
- hotels.append(j['hotelName'])
- mycsv.writerow(j)
- yes = count
- count = 0
- for i in files:
- total = list(csv.DictReader(open(i,'r')))
- for j in total:
- try:
- j.pop('pageNo')
- except:
- pass
- if j['hotelName'] not in hotels:
- count = count + 1
- # print count
- hotels.append(j['hotelName'])
- mycsv.writerow(j)
- rest = count
- file1.close()
- print yes
- # print yes
- print rest
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement