Advertisement
Guest User

Untitled

a guest
Oct 16th, 2019
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.12 KB | None | 0 0
  1. #!/usr/bin/python2.7
  2. # mapper.py
  3. import sys
  4.  
  5. SKIPVAL = '\\N'
  6.  
  7. for line in sys.stdin:
  8. fields = line.strip().split('\t') # Split title to fields using the data delimeter
  9.  
  10. title = "?"
  11. release = "?"
  12. titleType = "?"
  13. title_id = "?"
  14. rating = "?"
  15. voted = "?"
  16. # Select the required field
  17. if len(fields) == 9:
  18. title_id = fields[0]
  19. titleType = fields[1]
  20. title = fields[2]
  21. release = fields[5]
  22.  
  23. if release == SKIPVAL:
  24. continue
  25. if titleType == SKIPVAL:
  26. continue
  27. if title_id == SKIPVAL:
  28. continue
  29.  
  30. if (titleType != "movie" or (int(release) < 1990) or (int(release) > 2018)):
  31. continue
  32.  
  33. else:
  34. title_id = fields[0]
  35. rating = fields[1]
  36. voted = fields[2]
  37.  
  38. if rating == SKIPVAL:
  39. continue
  40. if voted == SKIPVAL:
  41. continue
  42.  
  43. if ((float(rating) < 7.5) or (int(voted) < 500000)):
  44. continue
  45.  
  46.  
  47. print(str(title_id) + "|" + str(title) + "|" + str(rating) + "|" + str(voted))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement