Advertisement
cr_sharat

Tripadvisor

Oct 5th, 2015
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.29 KB | None | 0 0
  1. import unicodecsv as csv
  2. from datetime import datetime
  3. date = datetime.today().strftime("%Y%m%d")
  4. import glob
  5. files = glob.glob("/home/sharat/Downloads/601_rory_tripadvisor_hotel*.csv")
  6. print files
  7. hotels,hotelswithbwt = [],[]
  8. count = 0
  9. keys = ["hotelName","partnerName","bookDirect","bookingSite1","bookingSite2","bookingSite3","bookingSites","noOfReviews","starReviews","checkIn","checkOut","city","hotelPage"]
  10. file1 = open("/home/sharat/Documents/"+date+"tripadvisor.csv","w")
  11. mycsv = csv.DictWriter(file1,fieldnames=keys,encoding='utf-8')
  12. mycsv.writeheader()
  13. hotelswithyes = []
  14. for i in files:
  15. total = list(csv.DictReader(open(i,'r')))
  16. for j in total:
  17. try:
  18. j.pop('pageNo')
  19. except:
  20. pass
  21. if j['bookDirect'] == 'YES' and (j['hotelName'],j['bookDirect'],j['city']) not in hotelswithbwt:
  22. count = count + 1
  23. hotelswithbwt.append((j['hotelName'],j['bookDirect'],j['city']))
  24. hotels.append(j['hotelName'])
  25. mycsv.writerow(j)
  26. yes = count
  27. count = 0
  28. for i in files:
  29. total = list(csv.DictReader(open(i,'r')))
  30. for j in total:
  31. try:
  32. j.pop('pageNo')
  33. except:
  34. pass
  35. if j['hotelName'] not in hotels:
  36. count = count + 1
  37. # print count
  38. hotels.append(j['hotelName'])
  39. mycsv.writerow(j)
  40. rest = count
  41. file1.close()
  42. print yes
  43. # print yes
  44. print rest
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement