Advertisement
Guest User

Untitled

a guest
Jul 31st, 2014
221
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.44 KB | None | 0 0
  1. #Mapper
  2. #!/usr/bin/python
  3. import sys
  4.  
  5. for line in sys.stdin:
  6. data = line.strip().split("GET ")
  7. if(len(data) > 1):
  8. #docname = data[1].split(" ")[0]
  9. #docname = data[0].split("-")
  10. #ip = docname[0]
  11. #date = docname[2].split("[")[1]
  12. #print "{0}\t{1}".format(docname, 1)
  13. #print ip, date
  14. part2 = data[1].split(" ")
  15. path = part2[0]
  16. #protocol = part2[1].split("\"")[0]
  17. #status = part2[2]
  18. #size = part2[3]
  19. #if path in '/images/filmmediablock/360/Chacha.jpg' :
  20. from urlparse import urlparse
  21. parsed = urlparse(path)
  22. print parsed.path+parsed.query
  23.  
  24. #Reducer
  25.  
  26. #!/usr/bin/python
  27.  
  28. # Write a MapReduce program which will display the number of hits for each different file on the Web site.
  29.  
  30. import sys
  31.  
  32. countTotal = 0
  33. oldKey = None
  34. highestTotal =0
  35. highestKey = None
  36. # Loop around the data
  37. # It will be in the format key\tval
  38. #
  39.  
  40. for line in sys.stdin:
  41. thisKey = line
  42. if oldKey and oldKey != thisKey:
  43. if countTotal > highestTotal:
  44. highestTotal = countTotal
  45. highestKey = thisKey
  46. oldKey = thisKey;
  47. countTotal = 0
  48.  
  49. oldKey = thisKey
  50. countTotal += 1
  51.  
  52. print highestKey, "\t", highestTotal
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement