Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python2.7
- # mapper.py
- import sys
- SKIPVAL = '\\N'
- for line in sys.stdin:
- fields = line.strip().split('\t') # Split title to fields using the data delimeter
- title = "?"
- release = "?"
- titleType = "?"
- title_id = "?"
- rating = "?"
- voted = "?"
- # Select the required field
- if len(fields) == 9:
- title_id = fields[0]
- titleType = fields[1]
- title = fields[2]
- release = fields[5]
- if release == SKIPVAL:
- continue
- if titleType == SKIPVAL:
- continue
- if title_id == SKIPVAL:
- continue
- if (titleType != "movie" or (int(release) < 1990) or (int(release) > 2018)):
- continue
- else:
- title_id = fields[0]
- rating = fields[1]
- voted = fields[2]
- if rating == SKIPVAL:
- continue
- if voted == SKIPVAL:
- continue
- if ((float(rating) < 7.5) or (int(voted) < 500000)):
- continue
- print(str(title_id) + "|" + str(title) + "|" + str(rating) + "|" + str(voted))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement